Browse Source

Bug fix for device memory freeing, less output

master
Apostolos Fanakis 7 years ago
parent
commit
504690c534
  1. 2
      mean_shift_cuda/meanshift.cu
  2. 70
      mean_shift_cuda/meanshift_gpu_utils.cu
  3. 8
      mean_shift_cuda/meanshift_gpu_utils.h

2
mean_shift_cuda/meanshift.cu

@ -30,7 +30,7 @@ int main(int argc, char **argv){
// tic
gettimeofday (&startwtime, NULL);
iterations = meanshift(vectors, &shifted_points, DEVIATION, &params);
iterations = meanshift(vectors, &shifted_points, DEVIATION);
// toc
gettimeofday (&endwtime, NULL);

70
mean_shift_cuda/meanshift_gpu_utils.cu

@ -12,7 +12,7 @@
cudaDeviceProp device_properties;
struct timeval start, end;
struct timeval start_w_time, end_w_time;
double seq;
//Based on:
@ -50,13 +50,13 @@ void set_GPU(){
}
}
int meanshift(double **original_points, double ***shifted_points, int deviation
, parameters *opt){
int meanshift(double **original_points, double ***shifted_points, int deviation){
// host variables
int size = 0;
static int iteration = 0;
static double **kernel_matrix, **mean_shift_vector;
double **new_shift, current_norm = 0;
static double **kernel_matrix, **mean_shift_vector, w_memcpy_time;
double **new_shift, current_norm = 0, tmp_w_memcpy_time;
bool is_first_iteration = false;
// device variables
static Matrix d_original_points, d_shifted_points, d_kernel_matrix, d_denominator,
@ -65,6 +65,7 @@ int meanshift(double **original_points, double ***shifted_points, int deviation
// allocates memory and copies original points on first iteration
if (iteration == 0 || (*shifted_points) == NULL){
is_first_iteration = true;
// allocates memory for shifted points array and copies original points into it
(*shifted_points) = alloc_double(NUMBER_OF_POINTS, DIMENSIONS);
duplicate(original_points, NUMBER_OF_POINTS, DIMENSIONS, shifted_points);
@ -82,24 +83,26 @@ int meanshift(double **original_points, double ***shifted_points, int deviation
kernel_matrix = alloc_double(NUMBER_OF_POINTS, NUMBER_OF_POINTS);
// tic
gettimeofday (&start, NULL);
gettimeofday (&start_w_time, NULL);
// allocates corresponding memory in device
init_device_memory(original_points, *shifted_points, &d_original_points, &d_shifted_points,
&d_kernel_matrix, &d_denominator, &d_mean_shift_vector);
// toc
gettimeofday (&end, NULL);
seq = (double)((end.tv_usec - start.tv_usec)/1.0e6 + end.tv_sec - start.tv_sec);
gettimeofday (&end_w_time, NULL);
seq = (double)((end_w_time.tv_usec - start_w_time.tv_usec)
/ 1.0e6 + end_w_time.tv_sec - start_w_time.tv_sec);
// printf("%s wall clock time = %f\n","Device memory allocation", seq);
// to create output data file
printf("%f ", seq);
if (params.verbose){
printf("Device memory allocation wall clock time = %f\n\n", seq);
}
}
// finds pairwise distance matrix (inside radius)
// [I, D] = rangesearch(x,y,h);
calculate_kernel_matrix(d_shifted_points, d_original_points, d_kernel_matrix, deviation,
&kernel_matrix);
&kernel_matrix, &tmp_w_memcpy_time);
w_memcpy_time += tmp_w_memcpy_time;
// calculates denominator
calculate_denominator(d_kernel_matrix, d_denominator);
@ -114,7 +117,9 @@ int meanshift(double **original_points, double ***shifted_points, int deviation
gpuErrchk( cudaMalloc(&(d_new_shift.elements), size) );
shift_points(d_kernel_matrix, d_original_points, d_shifted_points, d_new_shift, d_denominator,
d_mean_shift_vector, kernel_matrix, original_points, &new_shift, &mean_shift_vector);
d_mean_shift_vector, kernel_matrix, original_points, &new_shift, &mean_shift_vector,
&tmp_w_memcpy_time);
w_memcpy_time += tmp_w_memcpy_time;
// frees previously shifted points, they're now garbage
free((*shifted_points)[0]);
@ -147,12 +152,16 @@ int meanshift(double **original_points, double ***shifted_points, int deviation
}
// iterates until convergence
if (current_norm > opt->epsilon) {
if (current_norm > params.epsilon) {
++iteration;
meanshift(original_points, shifted_points, deviation, opt);
meanshift(original_points, shifted_points, deviation);
}
if (iteration == 0){
if (is_first_iteration){
if (params.verbose){
printf("\nCopying between device and host wall clock time = %f\n", w_memcpy_time);
}
// cleans up allocations
free(mean_shift_vector[0]);
free(mean_shift_vector);
@ -206,7 +215,7 @@ void init_device_memory(double **original_points, double **shifted_points,
}
void calculate_kernel_matrix(Matrix d_shifted_points, Matrix d_original_points,
Matrix d_kernel_matrix, double deviation, double ***kernel_matrix){
Matrix d_kernel_matrix, double deviation, double ***kernel_matrix, double *w_memcpy_time){
int size;
static bool first_iter = true;
// gets max block size supported from the device
@ -242,17 +251,15 @@ void calculate_kernel_matrix(Matrix d_shifted_points, Matrix d_original_points,
size = NUMBER_OF_POINTS * NUMBER_OF_POINTS * sizeof(double);
// tic
gettimeofday (&start, NULL);
gettimeofday (&start_w_time, NULL);
gpuErrchk( cudaMemcpy(&((*kernel_matrix)[0][0]), d_kernel_matrix.elements
, size, cudaMemcpyDeviceToHost) );
// toc
gettimeofday (&end, NULL);
seq = (double)((end.tv_usec - start.tv_usec)/1.0e6 + end.tv_sec - start.tv_sec);
// printf("%s wall clock time = %f\n","Copying from device to host", seq);
// to create output data file
printf("%f ", seq);
gettimeofday (&end_w_time, NULL);
*w_memcpy_time = (double)((end_w_time.tv_usec - start_w_time.tv_usec)
/ 1.0e6 + end_w_time.tv_sec - start_w_time.tv_sec);
}
void calculate_denominator(Matrix d_kernel_matrix, Matrix d_denominator){
@ -289,7 +296,8 @@ void calculate_denominator(Matrix d_kernel_matrix, Matrix d_denominator){
void shift_points(Matrix d_kernel_matrix, Matrix d_original_points, Matrix d_shifted_points,
Matrix d_new_shift, Matrix d_denominator, Matrix d_mean_shift_vector, double **kernel_matrix,
double **original_points, double ***new_shift, double ***mean_shift_vector){
double **original_points, double ***new_shift, double ***mean_shift_vector,
double *w_memcpy_time){
int size;
static bool first_iter = true;
// gets max block size supported from the device
@ -325,7 +333,7 @@ void shift_points(Matrix d_kernel_matrix, Matrix d_original_points, Matrix d_shi
size = NUMBER_OF_POINTS * DIMENSIONS * sizeof(double);
// tic
gettimeofday (&start, NULL);
gettimeofday (&start_w_time, NULL);
gpuErrchk( cudaMemcpy(&((*new_shift)[0][0]), d_new_shift.elements
, size, cudaMemcpyDeviceToHost) );
@ -333,13 +341,9 @@ void shift_points(Matrix d_kernel_matrix, Matrix d_original_points, Matrix d_shi
, size, cudaMemcpyDeviceToHost) );
// toc
gettimeofday (&end, NULL);
seq = (double)((end.tv_usec - start.tv_usec)/1.0e6 + end.tv_sec - start.tv_sec);
// printf("%s wall clock time = %f\n","Copying from device to host", seq);
// to create output data file
printf("%f ", seq);
gettimeofday (&end_w_time, NULL);
*w_memcpy_time = (double)((end_w_time.tv_usec - start_w_time.tv_usec)
/ 1.0e6 + end_w_time.tv_sec - start_w_time.tv_sec);
}
void free_device_memory(Matrix d_original_points, Matrix d_kernel_matrix, Matrix d_denominator,

8
mean_shift_cuda/meanshift_gpu_utils.h

@ -28,8 +28,7 @@ void set_GPU();
//Function meanshift recursively shifts original points according to the mean-shift algorithm saving
//the result to shiftedPoints. Struct opt has user options, h is the desirable deviation
int meanshift(double **original_points, double ***shifted_points, int h
, Parameters *opt);
int meanshift(double **original_points, double ***shifted_points, int h);
//Function init_device_memory allocates memory for necessary arrays in the device
void init_device_memory(double **original_points, double **shifted_points,
@ -39,7 +38,7 @@ void init_device_memory(double **original_points, double **shifted_points,
//Function calculate_kernel_matrix is a wrapper for the kernel call of the corresponding kernel
//"calculate_kernel_matrix_kernel" that calculates the kernel matrix
void calculate_kernel_matrix(Matrix d_shifted_points, Matrix d_original_points,
Matrix d_kernel_matrix, double deviation, double ***kernel_matrix);
Matrix d_kernel_matrix, double deviation, double ***kernel_matrix, double *w_memcpy_time);
//Function calculate_denominator is a wrapper for the kernel call of the corresponding kernel
//"calculate_denominator_kernel" that calculates the denominator of shifted points fraction
@ -49,7 +48,8 @@ void calculate_denominator(Matrix d_kernel_matrix, Matrix d_denominator);
//"shift_points_kernel" that shifts the positions of all points
void shift_points(Matrix d_kernel_matrix, Matrix d_original_points, Matrix d_shifted_points,
Matrix d_new_shift, Matrix d_denominator, Matrix d_mean_shift_vector, double **kernel_matrix,
double **original_points, double ***new_shift, double ***mean_shift_vector);
double **original_points, double ***new_shift, double ***mean_shift_vector,
double *w_memcpy_time);
//Function free_device_memory frees device's previously allocated memory
void free_device_memory(Matrix d_original_points, Matrix d_kernel_matrix, Matrix d_denominator,

Loading…
Cancel
Save