-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcalc_distance_reverse.cu
110 lines (91 loc) · 3.34 KB
/
calc_distance_reverse.cu
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
#include <stdio.h>
#include <stdlib.h>
class v_point
{
public:
long position;
int red;
int blue;
int green;
};
#define BLOCK_SIZE 24
__global__ void cu_calc_dist(v_point *pixels_d, v_point *centers_d, long array_size, long centers_size)
{
long i = blockIdx.x * BLOCK_SIZE + threadIdx.x;
long j = 0;
long x_point, y_point, x_center, y_center;
float distance, shortest_dist;
v_point closest_center;
if(i < array_size*array_size){
x_point = pixels_d[i].position / array_size;
y_point = pixels_d[i].position % array_size;
for(j = 0; j < centers_size; j++){
x_center = centers_d[j].position / array_size;
y_center = centers_d[j].position % array_size;
distance = sqrt(pow(1.0 * x_center - x_point, 2) + pow(1.0 * y_center - y_point, 2));
if(j == 0){
shortest_dist = distance;
closest_center.position = centers_d[j].position;
closest_center.red = centers_d[j].red;
closest_center.blue = centers_d[j].blue;
closest_center.green = centers_d[j].green;
// if not, then check to see if the new distance we calculated is smaller
// note this produces a first calculated point for contested areas
} else if(distance > shortest_dist){
shortest_dist = distance;
closest_center.position = centers_d[j].position;
closest_center.red = centers_d[j].red;
closest_center.blue = centers_d[j].blue;
closest_center.green = centers_d[j].green;
}
}
pixels_d[i] = closest_center;
}
}
// This function is called from the host computer.
// It manages memory and calls the function that is executed on the GPU
extern void calc_distance(v_point *pixels, v_point *centers, long array_size, long centers_size)
{
// build GPU counterpart for each class array on host
v_point *pixels_d;
v_point *centers_d;
cudaError_t result;
// allocate space in the device
result = cudaMalloc ((void**) &pixels_d, sizeof(v_point) * array_size * array_size);
if (result != cudaSuccess) {
fprintf(stderr, "cudaMalloc - 'pixels' failed.");
exit(1);
}
result = cudaMalloc ((void**) ¢ers_d, sizeof(v_point) * centers_size);
if (result != cudaSuccess) {
fprintf(stderr, "cudaMalloc - 'centers' failed.");
exit(1);
}
//copy the array from host to *_d in the device
result = cudaMemcpy (pixels_d, pixels, sizeof(v_point) * array_size * array_size, cudaMemcpyHostToDevice);
if (result != cudaSuccess) {
fprintf(stderr, "cudaMemcpy - 'pixels' failed.");
exit(1);
}
result = cudaMemcpy (centers_d, centers, sizeof(v_point) * centers_size, cudaMemcpyHostToDevice);
if (result != cudaSuccess) {
fprintf(stderr, "cudaMemcpy - 'centers' failed.");
exit(1);
}
// set execution configuration
dim3 dimblock (BLOCK_SIZE);
dim3 dimgrid (ceil((float) array_size*array_size/BLOCK_SIZE));
// actual computation: Call the kernel
cu_calc_dist <<<dimgrid, dimblock>>> (pixels_d, centers_d, array_size, centers_size);
// transfer results back to host
result = cudaMemcpy (pixels, pixels_d, sizeof(v_point) * array_size * array_size, cudaMemcpyDeviceToHost);
if (result != cudaSuccess) {
fprintf(stderr, "cudaMemcpy copy to host failed. %s\n", cudaGetErrorString(result));
exit(1);
cudaFree(pixels_d);
cudaFree(centers_d);
}
// release the memory on the GPU
cudaFree(pixels_d);
cudaFree(centers_d);
}