Clean-up, Add comments

8 years ago · 2770b593cb
6 changed files with 85 additions and 65 deletions
--- a/mean_shift_cuda/meanshift.cu
+++ b/mean_shift_cuda/meanshift.cu
@ -8,8 +8,8 @@
 int DEVIATION = 1;
 int NUMBER_OF_POINTS = 600;
 int DIMENSIONS = 2;
-char* POINTS_FILENAME = "../data/X.bin";
+const char *POINTS_FILENAME = "../data/X.bin";
-char* LABELS_FILENAME = "../data/L.bin";
+const char *LABELS_FILENAME = "../data/L.bin";
 parameters params;
 struct timeval startwtime, endwtime;
--- a/mean_shift_cuda/meanshift_gpu_utils.cu
+++ b/mean_shift_cuda/meanshift_gpu_utils.cu
@ -15,8 +15,8 @@ cudaDeviceProp device_properties;
 struct timeval start, end;
 double seq;
-//Based on https://stackoverflow.com/a/28113186
+//Based on:
-//Pio psagmeno link https://www.cs.virginia.edu/~csadmin/wiki/index.php/CUDA_Support/Choosing_a_GPU
+//          https://www.cs.virginia.edu/~csadmin/wiki/index.php/CUDA_Support/Choosing_a_GPU
 void set_GPU(){
    int devices_count = 0, max_multiprocessors = 0, max_device = 0;
@ -56,7 +56,7 @@ int meanshift(double **original_points, double ***shifted_points, int deviation
    int size = 0;
    static int iteration = 0;
    static double **kernel_matrix, **mean_shift_vector;
-    double **new_shift;
+    double **new_shift, current_norm = 0;
    // device variables
    static Matrix d_original_points, d_shifted_points, d_kernel_matrix, d_denominator,
@ -78,7 +78,7 @@ int meanshift(double **original_points, double ***shifted_points, int deviation
            }
        }
-        // allocates memory for other arrays needed
+        // allocates memory for kernel_matrix
        kernel_matrix = alloc_double(NUMBER_OF_POINTS, NUMBER_OF_POINTS);
        // tic
@ -91,7 +91,6 @@ int meanshift(double **original_points, double ***shifted_points, int deviation
        gettimeofday (&end, NULL);
        seq = (double)((end.tv_usec - start.tv_usec)/1.0e6 + end.tv_sec - start.tv_sec);
 //        printf("%s wall clock time = %f\n","Device memory allocation", seq);
        // to create output data file
        printf("%f ", seq);
@ -105,8 +104,6 @@ int meanshift(double **original_points, double ***shifted_points, int deviation
    // calculates denominator
    calculate_denominator(d_kernel_matrix, d_denominator);
    // creates new y vector
    // allocates memory in every recursion
    new_shift = alloc_double(NUMBER_OF_POINTS, DIMENSIONS);
@ -130,11 +127,20 @@ int meanshift(double **original_points, double ***shifted_points, int deviation
    }
    // calculates norm of the new mean shift vector in GPU using "cuBlas" library function
    double current_norm = 0;
    cublasHandle_t handle;
-    cublasCreate(&handle);
+    cublasStatus_t cublas_status = cublasCreate(&handle);
-    cublasDnrm2(handle, NUMBER_OF_POINTS * DIMENSIONS, d_mean_shift_vector.elements, 1, &current_norm);
+    if (cublas_status != CUBLAS_STATUS_SUCCESS){
-    cublasDestroy(handle);
+        exit(cublas_status);
    }
    cublas_status = cublasDnrm2(handle, NUMBER_OF_POINTS * DIMENSIONS, d_mean_shift_vector.elements,
        1, &current_norm);
    if (cublas_status != CUBLAS_STATUS_SUCCESS){
        exit(cublas_status);
    }
    cublas_status = cublasDestroy(handle);
    if (cublas_status != CUBLAS_STATUS_SUCCESS){
        exit(cublas_status);
    }
    if (params.verbose){
        printf("Iteration n. %d, error\t%f \n", iteration, current_norm);
@ -244,7 +250,6 @@ void calculate_kernel_matrix(Matrix d_shifted_points, Matrix d_original_points,
    gettimeofday (&end, NULL);
    seq = (double)((end.tv_usec - start.tv_usec)/1.0e6 + end.tv_sec - start.tv_sec);
 //    printf("%s wall clock time = %f\n","Copying from device to host", seq);
    // to create output data file
        printf("%f ", seq);
@ -331,7 +336,6 @@ void shift_points(Matrix d_kernel_matrix, Matrix d_original_points, Matrix d_shi
    gettimeofday (&end, NULL);
    seq = (double)((end.tv_usec - start.tv_usec)/1.0e6 + end.tv_sec - start.tv_sec);
 //    printf("%s wall clock time = %f\n","Copying from device to host", seq);
    // to create output data file
        printf("%f ", seq);
--- a/mean_shift_cuda/meanshift_gpu_utils.h
+++ b/mean_shift_cuda/meanshift_gpu_utils.h
@ -4,7 +4,7 @@
 #include "meanshift_kernels.h"
 //GPU error check snippet taken from:
-//https://stackoverflow.com/a/14038590
+//              https://stackoverflow.com/a/14038590
 #define gpuErrchk(ans) { gpuAssert((ans), __FILE__, __LINE__); }
 inline void gpuAssert(cudaError_t code, const char *file, int line, bool abort=true){
   if (code != cudaSuccess){
@ -17,37 +17,42 @@ inline void gpuAssert(cudaError_t code, const char *file, int line, bool abort=t
 extern int DEVIATION;
 extern int NUMBER_OF_POINTS;
 extern int DIMENSIONS;
-extern char* POINTS_FILENAME;
+extern const char* POINTS_FILENAME;
-extern char* LABELS_FILENAME;
+extern const char* LABELS_FILENAME;
-extern parameters params;
+extern Parameters params;
 extern cudaDeviceProp device_properties;
 //Function set_GPU parses available GPU devices, selects the one with the most multi-processors for
 //usage and stores its properties in global struct device_properties
 void set_GPU();
-//Function meanshift recursively shifts original points according to th
+//Function meanshift recursively shifts original points according to the mean-shift algorithm saving
-//mean-shift algorithm saving the result to shiftedPoints. Struct opt has user
+//the result to shiftedPoints. Struct opt has user options, h is the desirable deviation
 //options, h is the desirable deviation.
 int meanshift(double **original_points, double ***shifted_points, int h
-    , parameters *opt);
+    , Parameters *opt);
 //Function init_device_memory allocates memory for necessary arrays in the device
 void init_device_memory(double **original_points, double **shifted_points,
-    Matrix *d_original_points, Matrix *d_shifted_points,
+    Matrix *d_original_points, Matrix *d_shifted_points, Matrix *d_kernel_matrix,
-    Matrix *d_kernel_matrix, Matrix *d_denominator, Matrix *d_new_shift);
+    Matrix *d_denominator, Matrix *d_new_shift);
 //Function calculate_kernel_matrix is a wrapper for the kernel call of the corresponding kernel
 //"calculate_kernel_matrix_kernel" that calculates the kernel matrix
 void calculate_kernel_matrix(Matrix d_shifted_points, Matrix d_original_points,
    Matrix d_kernel_matrix, double deviation, double ***kernel_matrix);
-//Function multiply allocates memory in GPU, sends the data and calls the 
+//Function calculate_denominator is a wrapper for the kernel call of the corresponding kernel
-//multiply kernel function.
+//"calculate_denominator_kernel" that calculates the denominator of shifted points fraction
 void calculate_denominator(Matrix d_kernel_matrix, Matrix d_denominator);
 //Function shift_points is a wrapper for the kernel call of the corresponding kernel
 //"shift_points_kernel" that shifts the positions of all points
 void shift_points(Matrix d_kernel_matrix, Matrix d_original_points, Matrix d_shifted_points,
    Matrix d_new_shift, Matrix d_denominator, Matrix d_mean_shift_vector, double **kernel_matrix,
    double **original_points, double ***new_shift, double ***mean_shift_vector);
 //Function free_device_memory frees device's previously allocated memory
 void free_device_memory(Matrix d_original_points, Matrix d_kernel_matrix, Matrix d_denominator,
    Matrix d_new_shift);
 //Function calculate_denominator allocates memory in GPU, sends the data and calls the
 //denominator kernel function.
 void calculate_denominator(Matrix d_kernel_matrix, Matrix d_denominator);
 #endif //SERIAL_GPU_UTILS_H
--- a/mean_shift_cuda/meanshift_kernels.cu
+++ b/mean_shift_cuda/meanshift_kernels.cu
@ -16,7 +16,8 @@ __global__ void calculate_kernel_matrix_kernel(Matrix shifted_points, Matrix ori
    // calculate distance
    double sum = 0, dif;
    for (int i=0; i<dimensions; i++){
-        dif = shifted_points.elements[row * dimensions + i] - original_points.elements[col * dimensions + i];
+        dif = shifted_points.elements[row * dimensions + i]
            - original_points.elements[col * dimensions + i];
        sum += dif * dif;
    }
    double distance = sqrt(sum);
@ -34,8 +35,25 @@ __global__ void calculate_kernel_matrix_kernel(Matrix shifted_points, Matrix ori
    }
 }
-__global__ void shift_points_kernel(Matrix original_points, Matrix kernel_matrix, Matrix shifted_points,
+__global__ void denominator_kernel(Matrix denominator, Matrix kernel_matrix){
-    Matrix new_shift, Matrix denominator, Matrix mean_shift_vector){
+    // each thread computes one element of denominator_kernel
    // by accumulating results into cell_value
    double cell_value = 0;
    int row = blockIdx.x * blockDim.x + threadIdx.x;
    // performs calculations only if thread's indexes are within matrix bounds
    if (row >= denominator.height){
        return;
    }
    for (int column = 0; column < kernel_matrix.width; ++column){
         cell_value += kernel_matrix.elements[row * kernel_matrix.width + column];
    }
    denominator.elements[row] = cell_value;
 }
 __global__ void shift_points_kernel(Matrix original_points, Matrix kernel_matrix,
    Matrix shifted_points, Matrix new_shift, Matrix denominator, Matrix mean_shift_vector){
    // each thread computes one element of new_shift
    // by accumulating results into cell_value
    double cell_value = 0;
@ -61,21 +79,4 @@ __global__ void shift_points_kernel(Matrix original_points, Matrix kernel_matrix
    mean_shift_vector.elements[row * new_shift.width + col] =
        new_shift.elements[row * new_shift.width + col] -
        shifted_points.elements[row * new_shift.width + col];
 }
 __global__ void denominator_kernel(Matrix denominator, Matrix kernel_matrix){
    // each thread computes one element of denominator_kernel
    // by accumulating results into cell_value
    double cell_value = 0;
    int row = blockIdx.x * blockDim.x + threadIdx.x;
    // performs calculations only if thread's indexes are within matrix bounds
    if (row >= denominator.height){
        return;
    }
    for (int column = 0; column < kernel_matrix.width; ++column){
         cell_value += kernel_matrix.elements[row * kernel_matrix.width + column];
    }
    denominator.elements[row] = cell_value;
 }
--- a/mean_shift_cuda/meanshift_kernels.h
+++ b/mean_shift_cuda/meanshift_kernels.h
@ -1,19 +1,26 @@
 #ifndef SERIAL_KERNELS_H    /*    Include guard    */
 #define SERIAL_KERNELS_H
 /*      Structures     */
 //Matrix is used to describe matrices
 typedef struct {
    int width;
    int height;
    double *elements;
 } Matrix;
 //Kernel calculate_kernel_matrix_kernel calculates the current kernel matrix
 __global__ void calculate_kernel_matrix_kernel(Matrix shifted_points, Matrix original_points,
    double deviation, Matrix kernel_matrix);
-//Function multiply_kernel calculates the product of matrices 1 and 2 into output.
+//Kernel denominator_kernel calculates the sum in the denominator of the fraction used to find new
-__global__ void shift_points_kernel(Matrix original_points, Matrix kernel_matrix, Matrix shifted_points,
+//(shifted) positions of the points
    Matrix new_shift, Matrix denominator, Matrix mean_shift_vector);
 __global__ void denominator_kernel(Matrix denominator, Matrix kernel_matrix);
 //Kernel shift_points_kernel shifts the positions of all points and calculates the new mean shift
 //vector according to the new point array
 __global__ void shift_points_kernel(Matrix original_points, Matrix kernel_matrix,
    Matrix shifted_points, Matrix new_shift, Matrix denominator, Matrix mean_shift_vector);
 #endif //SERIAL_KERNELS_H
--- a/mean_shift_cuda/meanshift_utils.h
+++ b/mean_shift_cuda/meanshift_utils.h
@ -3,30 +3,33 @@
 #include <stdbool.h>
-/*        Structs       */
+/*      Structures     */
 //Parameters is used to store session specific variables in an orderly way
 typedef struct parameters {
    double epsilon;
    bool verbose;
    bool display;
-} parameters;
+} Parameters;
-//Function get_args parses command line arguments.
+//Function get_args parses command line arguments
-void get_args(int argc, char **argv, parameters *params);
+void get_args(int argc, char **argv, Parameters *params);
-//Function init reads the dataset and label arrays from the corresponding files.
+//Function init sets up the GPU for later use, gets its properties and reads the dataset and label
 //arrays from the corresponding files
 void init(double ***vectors, char **labels);
-//Function alloc_double allocates rows*cols bytes of continuous memory.
+//Function alloc_double allocates rows*cols bytes of continuous memory
 double **alloc_double(int rows, int cols);
-//Function duplicate copies the values of source array to dest array.
+//Function duplicate copies the values of source array to dest array
 void duplicate(double **source, int rows, int cols, double ***dest);
-//Function print_matrix prints array of dimensions rowsXcols to the console.
+//Function print_matrix prints array of dimensions <rows X cols> to the console
 void print_matrix(double **array, int rows, int cols);
-//Function save_matrix prints matrix in a csv file with path/filename
+//Function save_matrix stores matrix in a csv file with path/filename "../output/output_iteration".
-//"output/output_iteration". If a file already exists new lines are concatenated.
+//If a file already exists new lines are concatenated
 void save_matrix(double **matrix, int iteration);
 #endif //SERIAL_UTILS_H