diff --git a/mean_shift_cuda/meanshift.cu b/mean_shift_cuda/meanshift.cu index f5324dc..2ebae16 100644 --- a/mean_shift_cuda/meanshift.cu +++ b/mean_shift_cuda/meanshift.cu @@ -8,8 +8,8 @@ int DEVIATION = 1; int NUMBER_OF_POINTS = 600; int DIMENSIONS = 2; -char* POINTS_FILENAME = "../data/X.bin"; -char* LABELS_FILENAME = "../data/L.bin"; +const char *POINTS_FILENAME = "../data/X.bin"; +const char *LABELS_FILENAME = "../data/L.bin"; parameters params; struct timeval startwtime, endwtime; diff --git a/mean_shift_cuda/meanshift_gpu_utils.cu b/mean_shift_cuda/meanshift_gpu_utils.cu index ba886ee..c92e78f 100644 --- a/mean_shift_cuda/meanshift_gpu_utils.cu +++ b/mean_shift_cuda/meanshift_gpu_utils.cu @@ -15,8 +15,8 @@ cudaDeviceProp device_properties; struct timeval start, end; double seq; -//Based on https://stackoverflow.com/a/28113186 -//Pio psagmeno link https://www.cs.virginia.edu/~csadmin/wiki/index.php/CUDA_Support/Choosing_a_GPU +//Based on: +// https://www.cs.virginia.edu/~csadmin/wiki/index.php/CUDA_Support/Choosing_a_GPU void set_GPU(){ int devices_count = 0, max_multiprocessors = 0, max_device = 0; @@ -56,7 +56,7 @@ int meanshift(double **original_points, double ***shifted_points, int deviation int size = 0; static int iteration = 0; static double **kernel_matrix, **mean_shift_vector; - double **new_shift; + double **new_shift, current_norm = 0; // device variables static Matrix d_original_points, d_shifted_points, d_kernel_matrix, d_denominator, @@ -78,7 +78,7 @@ int meanshift(double **original_points, double ***shifted_points, int deviation } } - // allocates memory for other arrays needed + // allocates memory for kernel_matrix kernel_matrix = alloc_double(NUMBER_OF_POINTS, NUMBER_OF_POINTS); // tic @@ -91,7 +91,6 @@ int meanshift(double **original_points, double ***shifted_points, int deviation gettimeofday (&end, NULL); seq = (double)((end.tv_usec - start.tv_usec)/1.0e6 + end.tv_sec - start.tv_sec); - // printf("%s wall clock time = %f\n","Device memory allocation", seq); // to create output data file printf("%f ", seq); @@ -105,8 +104,6 @@ int meanshift(double **original_points, double ***shifted_points, int deviation // calculates denominator calculate_denominator(d_kernel_matrix, d_denominator); - - // creates new y vector // allocates memory in every recursion new_shift = alloc_double(NUMBER_OF_POINTS, DIMENSIONS); @@ -130,11 +127,20 @@ int meanshift(double **original_points, double ***shifted_points, int deviation } // calculates norm of the new mean shift vector in GPU using "cuBlas" library function - double current_norm = 0; cublasHandle_t handle; - cublasCreate(&handle); - cublasDnrm2(handle, NUMBER_OF_POINTS * DIMENSIONS, d_mean_shift_vector.elements, 1, ¤t_norm); - cublasDestroy(handle); + cublasStatus_t cublas_status = cublasCreate(&handle); + if (cublas_status != CUBLAS_STATUS_SUCCESS){ + exit(cublas_status); + } + cublas_status = cublasDnrm2(handle, NUMBER_OF_POINTS * DIMENSIONS, d_mean_shift_vector.elements, + 1, ¤t_norm); + if (cublas_status != CUBLAS_STATUS_SUCCESS){ + exit(cublas_status); + } + cublas_status = cublasDestroy(handle); + if (cublas_status != CUBLAS_STATUS_SUCCESS){ + exit(cublas_status); + } if (params.verbose){ printf("Iteration n. %d, error\t%f \n", iteration, current_norm); @@ -244,7 +250,6 @@ void calculate_kernel_matrix(Matrix d_shifted_points, Matrix d_original_points, gettimeofday (&end, NULL); seq = (double)((end.tv_usec - start.tv_usec)/1.0e6 + end.tv_sec - start.tv_sec); - // printf("%s wall clock time = %f\n","Copying from device to host", seq); // to create output data file printf("%f ", seq); @@ -331,7 +336,6 @@ void shift_points(Matrix d_kernel_matrix, Matrix d_original_points, Matrix d_shi gettimeofday (&end, NULL); seq = (double)((end.tv_usec - start.tv_usec)/1.0e6 + end.tv_sec - start.tv_sec); - // printf("%s wall clock time = %f\n","Copying from device to host", seq); // to create output data file printf("%f ", seq); diff --git a/mean_shift_cuda/meanshift_gpu_utils.h b/mean_shift_cuda/meanshift_gpu_utils.h index 48d466c..1c2cdf5 100644 --- a/mean_shift_cuda/meanshift_gpu_utils.h +++ b/mean_shift_cuda/meanshift_gpu_utils.h @@ -4,7 +4,7 @@ #include "meanshift_kernels.h" //GPU error check snippet taken from: -//https://stackoverflow.com/a/14038590 +// https://stackoverflow.com/a/14038590 #define gpuErrchk(ans) { gpuAssert((ans), __FILE__, __LINE__); } inline void gpuAssert(cudaError_t code, const char *file, int line, bool abort=true){ if (code != cudaSuccess){ @@ -17,37 +17,42 @@ inline void gpuAssert(cudaError_t code, const char *file, int line, bool abort=t extern int DEVIATION; extern int NUMBER_OF_POINTS; extern int DIMENSIONS; -extern char* POINTS_FILENAME; -extern char* LABELS_FILENAME; -extern parameters params; +extern const char* POINTS_FILENAME; +extern const char* LABELS_FILENAME; +extern Parameters params; extern cudaDeviceProp device_properties; +//Function set_GPU parses available GPU devices, selects the one with the most multi-processors for +//usage and stores its properties in global struct device_properties void set_GPU(); -//Function meanshift recursively shifts original points according to th -//mean-shift algorithm saving the result to shiftedPoints. Struct opt has user -//options, h is the desirable deviation. +//Function meanshift recursively shifts original points according to the mean-shift algorithm saving +//the result to shiftedPoints. Struct opt has user options, h is the desirable deviation int meanshift(double **original_points, double ***shifted_points, int h - , parameters *opt); + , Parameters *opt); +//Function init_device_memory allocates memory for necessary arrays in the device void init_device_memory(double **original_points, double **shifted_points, - Matrix *d_original_points, Matrix *d_shifted_points, - Matrix *d_kernel_matrix, Matrix *d_denominator, Matrix *d_new_shift); + Matrix *d_original_points, Matrix *d_shifted_points, Matrix *d_kernel_matrix, + Matrix *d_denominator, Matrix *d_new_shift); +//Function calculate_kernel_matrix is a wrapper for the kernel call of the corresponding kernel +//"calculate_kernel_matrix_kernel" that calculates the kernel matrix void calculate_kernel_matrix(Matrix d_shifted_points, Matrix d_original_points, Matrix d_kernel_matrix, double deviation, double ***kernel_matrix); -//Function multiply allocates memory in GPU, sends the data and calls the -//multiply kernel function. +//Function calculate_denominator is a wrapper for the kernel call of the corresponding kernel +//"calculate_denominator_kernel" that calculates the denominator of shifted points fraction +void calculate_denominator(Matrix d_kernel_matrix, Matrix d_denominator); + +//Function shift_points is a wrapper for the kernel call of the corresponding kernel +//"shift_points_kernel" that shifts the positions of all points void shift_points(Matrix d_kernel_matrix, Matrix d_original_points, Matrix d_shifted_points, Matrix d_new_shift, Matrix d_denominator, Matrix d_mean_shift_vector, double **kernel_matrix, double **original_points, double ***new_shift, double ***mean_shift_vector); +//Function free_device_memory frees device's previously allocated memory void free_device_memory(Matrix d_original_points, Matrix d_kernel_matrix, Matrix d_denominator, Matrix d_new_shift); -//Function calculate_denominator allocates memory in GPU, sends the data and calls the -//denominator kernel function. -void calculate_denominator(Matrix d_kernel_matrix, Matrix d_denominator); - #endif //SERIAL_GPU_UTILS_H \ No newline at end of file diff --git a/mean_shift_cuda/meanshift_kernels.cu b/mean_shift_cuda/meanshift_kernels.cu index 04ff883..a20055d 100644 --- a/mean_shift_cuda/meanshift_kernels.cu +++ b/mean_shift_cuda/meanshift_kernels.cu @@ -16,7 +16,8 @@ __global__ void calculate_kernel_matrix_kernel(Matrix shifted_points, Matrix ori // calculate distance double sum = 0, dif; for (int i=0; i= denominator.height){ + return; + } + + for (int column = 0; column < kernel_matrix.width; ++column){ + cell_value += kernel_matrix.elements[row * kernel_matrix.width + column]; + } + denominator.elements[row] = cell_value; +} + +__global__ void shift_points_kernel(Matrix original_points, Matrix kernel_matrix, + Matrix shifted_points, Matrix new_shift, Matrix denominator, Matrix mean_shift_vector){ // each thread computes one element of new_shift // by accumulating results into cell_value double cell_value = 0; @@ -61,21 +79,4 @@ __global__ void shift_points_kernel(Matrix original_points, Matrix kernel_matrix mean_shift_vector.elements[row * new_shift.width + col] = new_shift.elements[row * new_shift.width + col] - shifted_points.elements[row * new_shift.width + col]; -} - -__global__ void denominator_kernel(Matrix denominator, Matrix kernel_matrix){ - // each thread computes one element of denominator_kernel - // by accumulating results into cell_value - double cell_value = 0; - int row = blockIdx.x * blockDim.x + threadIdx.x; - - // performs calculations only if thread's indexes are within matrix bounds - if (row >= denominator.height){ - return; - } - - for (int column = 0; column < kernel_matrix.width; ++column){ - cell_value += kernel_matrix.elements[row * kernel_matrix.width + column]; - } - denominator.elements[row] = cell_value; } \ No newline at end of file diff --git a/mean_shift_cuda/meanshift_kernels.h b/mean_shift_cuda/meanshift_kernels.h index d33641e..0ff8070 100644 --- a/mean_shift_cuda/meanshift_kernels.h +++ b/mean_shift_cuda/meanshift_kernels.h @@ -1,19 +1,26 @@ #ifndef SERIAL_KERNELS_H /* Include guard */ #define SERIAL_KERNELS_H +/* Structures */ + +//Matrix is used to describe matrices typedef struct { int width; int height; double *elements; } Matrix; +//Kernel calculate_kernel_matrix_kernel calculates the current kernel matrix __global__ void calculate_kernel_matrix_kernel(Matrix shifted_points, Matrix original_points, double deviation, Matrix kernel_matrix); -//Function multiply_kernel calculates the product of matrices 1 and 2 into output. -__global__ void shift_points_kernel(Matrix original_points, Matrix kernel_matrix, Matrix shifted_points, - Matrix new_shift, Matrix denominator, Matrix mean_shift_vector); - +//Kernel denominator_kernel calculates the sum in the denominator of the fraction used to find new +//(shifted) positions of the points __global__ void denominator_kernel(Matrix denominator, Matrix kernel_matrix); +//Kernel shift_points_kernel shifts the positions of all points and calculates the new mean shift +//vector according to the new point array +__global__ void shift_points_kernel(Matrix original_points, Matrix kernel_matrix, + Matrix shifted_points, Matrix new_shift, Matrix denominator, Matrix mean_shift_vector); + #endif //SERIAL_KERNELS_H \ No newline at end of file diff --git a/mean_shift_cuda/meanshift_utils.h b/mean_shift_cuda/meanshift_utils.h index 7c822a2..3be505a 100644 --- a/mean_shift_cuda/meanshift_utils.h +++ b/mean_shift_cuda/meanshift_utils.h @@ -3,30 +3,33 @@ #include -/* Structs */ +/* Structures */ + +//Parameters is used to store session specific variables in an orderly way typedef struct parameters { double epsilon; bool verbose; bool display; -} parameters; +} Parameters; -//Function get_args parses command line arguments. -void get_args(int argc, char **argv, parameters *params); +//Function get_args parses command line arguments +void get_args(int argc, char **argv, Parameters *params); -//Function init reads the dataset and label arrays from the corresponding files. +//Function init sets up the GPU for later use, gets its properties and reads the dataset and label +//arrays from the corresponding files void init(double ***vectors, char **labels); -//Function alloc_double allocates rows*cols bytes of continuous memory. +//Function alloc_double allocates rows*cols bytes of continuous memory double **alloc_double(int rows, int cols); -//Function duplicate copies the values of source array to dest array. +//Function duplicate copies the values of source array to dest array void duplicate(double **source, int rows, int cols, double ***dest); -//Function print_matrix prints array of dimensions rowsXcols to the console. +//Function print_matrix prints array of dimensions to the console void print_matrix(double **array, int rows, int cols); -//Function save_matrix prints matrix in a csv file with path/filename -//"output/output_iteration". If a file already exists new lines are concatenated. +//Function save_matrix stores matrix in a csv file with path/filename "../output/output_iteration". +//If a file already exists new lines are concatenated void save_matrix(double **matrix, int iteration); #endif //SERIAL_UTILS_H \ No newline at end of file