Browse Source

Clean-up, Add comments

master
Apostolos Fanakis 7 years ago
parent
commit
2770b593cb
  1. 4
      mean_shift_cuda/meanshift.cu
  2. 30
      mean_shift_cuda/meanshift_gpu_utils.cu
  3. 37
      mean_shift_cuda/meanshift_gpu_utils.h
  4. 41
      mean_shift_cuda/meanshift_kernels.cu
  5. 15
      mean_shift_cuda/meanshift_kernels.h
  6. 23
      mean_shift_cuda/meanshift_utils.h

4
mean_shift_cuda/meanshift.cu

@ -8,8 +8,8 @@
int DEVIATION = 1; int DEVIATION = 1;
int NUMBER_OF_POINTS = 600; int NUMBER_OF_POINTS = 600;
int DIMENSIONS = 2; int DIMENSIONS = 2;
char* POINTS_FILENAME = "../data/X.bin"; const char *POINTS_FILENAME = "../data/X.bin";
char* LABELS_FILENAME = "../data/L.bin"; const char *LABELS_FILENAME = "../data/L.bin";
parameters params; parameters params;
struct timeval startwtime, endwtime; struct timeval startwtime, endwtime;

30
mean_shift_cuda/meanshift_gpu_utils.cu

@ -15,8 +15,8 @@ cudaDeviceProp device_properties;
struct timeval start, end; struct timeval start, end;
double seq; double seq;
//Based on https://stackoverflow.com/a/28113186 //Based on:
//Pio psagmeno link https://www.cs.virginia.edu/~csadmin/wiki/index.php/CUDA_Support/Choosing_a_GPU // https://www.cs.virginia.edu/~csadmin/wiki/index.php/CUDA_Support/Choosing_a_GPU
void set_GPU(){ void set_GPU(){
int devices_count = 0, max_multiprocessors = 0, max_device = 0; int devices_count = 0, max_multiprocessors = 0, max_device = 0;
@ -56,7 +56,7 @@ int meanshift(double **original_points, double ***shifted_points, int deviation
int size = 0; int size = 0;
static int iteration = 0; static int iteration = 0;
static double **kernel_matrix, **mean_shift_vector; static double **kernel_matrix, **mean_shift_vector;
double **new_shift; double **new_shift, current_norm = 0;
// device variables // device variables
static Matrix d_original_points, d_shifted_points, d_kernel_matrix, d_denominator, static Matrix d_original_points, d_shifted_points, d_kernel_matrix, d_denominator,
@ -78,7 +78,7 @@ int meanshift(double **original_points, double ***shifted_points, int deviation
} }
} }
// allocates memory for other arrays needed // allocates memory for kernel_matrix
kernel_matrix = alloc_double(NUMBER_OF_POINTS, NUMBER_OF_POINTS); kernel_matrix = alloc_double(NUMBER_OF_POINTS, NUMBER_OF_POINTS);
// tic // tic
@ -91,7 +91,6 @@ int meanshift(double **original_points, double ***shifted_points, int deviation
gettimeofday (&end, NULL); gettimeofday (&end, NULL);
seq = (double)((end.tv_usec - start.tv_usec)/1.0e6 + end.tv_sec - start.tv_sec); seq = (double)((end.tv_usec - start.tv_usec)/1.0e6 + end.tv_sec - start.tv_sec);
// printf("%s wall clock time = %f\n","Device memory allocation", seq); // printf("%s wall clock time = %f\n","Device memory allocation", seq);
// to create output data file // to create output data file
printf("%f ", seq); printf("%f ", seq);
@ -105,8 +104,6 @@ int meanshift(double **original_points, double ***shifted_points, int deviation
// calculates denominator // calculates denominator
calculate_denominator(d_kernel_matrix, d_denominator); calculate_denominator(d_kernel_matrix, d_denominator);
// creates new y vector // creates new y vector
// allocates memory in every recursion // allocates memory in every recursion
new_shift = alloc_double(NUMBER_OF_POINTS, DIMENSIONS); new_shift = alloc_double(NUMBER_OF_POINTS, DIMENSIONS);
@ -130,11 +127,20 @@ int meanshift(double **original_points, double ***shifted_points, int deviation
} }
// calculates norm of the new mean shift vector in GPU using "cuBlas" library function // calculates norm of the new mean shift vector in GPU using "cuBlas" library function
double current_norm = 0;
cublasHandle_t handle; cublasHandle_t handle;
cublasCreate(&handle); cublasStatus_t cublas_status = cublasCreate(&handle);
cublasDnrm2(handle, NUMBER_OF_POINTS * DIMENSIONS, d_mean_shift_vector.elements, 1, &current_norm); if (cublas_status != CUBLAS_STATUS_SUCCESS){
cublasDestroy(handle); exit(cublas_status);
}
cublas_status = cublasDnrm2(handle, NUMBER_OF_POINTS * DIMENSIONS, d_mean_shift_vector.elements,
1, &current_norm);
if (cublas_status != CUBLAS_STATUS_SUCCESS){
exit(cublas_status);
}
cublas_status = cublasDestroy(handle);
if (cublas_status != CUBLAS_STATUS_SUCCESS){
exit(cublas_status);
}
if (params.verbose){ if (params.verbose){
printf("Iteration n. %d, error\t%f \n", iteration, current_norm); printf("Iteration n. %d, error\t%f \n", iteration, current_norm);
@ -244,7 +250,6 @@ void calculate_kernel_matrix(Matrix d_shifted_points, Matrix d_original_points,
gettimeofday (&end, NULL); gettimeofday (&end, NULL);
seq = (double)((end.tv_usec - start.tv_usec)/1.0e6 + end.tv_sec - start.tv_sec); seq = (double)((end.tv_usec - start.tv_usec)/1.0e6 + end.tv_sec - start.tv_sec);
// printf("%s wall clock time = %f\n","Copying from device to host", seq); // printf("%s wall clock time = %f\n","Copying from device to host", seq);
// to create output data file // to create output data file
printf("%f ", seq); printf("%f ", seq);
@ -331,7 +336,6 @@ void shift_points(Matrix d_kernel_matrix, Matrix d_original_points, Matrix d_shi
gettimeofday (&end, NULL); gettimeofday (&end, NULL);
seq = (double)((end.tv_usec - start.tv_usec)/1.0e6 + end.tv_sec - start.tv_sec); seq = (double)((end.tv_usec - start.tv_usec)/1.0e6 + end.tv_sec - start.tv_sec);
// printf("%s wall clock time = %f\n","Copying from device to host", seq); // printf("%s wall clock time = %f\n","Copying from device to host", seq);
// to create output data file // to create output data file
printf("%f ", seq); printf("%f ", seq);

37
mean_shift_cuda/meanshift_gpu_utils.h

@ -4,7 +4,7 @@
#include "meanshift_kernels.h" #include "meanshift_kernels.h"
//GPU error check snippet taken from: //GPU error check snippet taken from:
//https://stackoverflow.com/a/14038590 // https://stackoverflow.com/a/14038590
#define gpuErrchk(ans) { gpuAssert((ans), __FILE__, __LINE__); } #define gpuErrchk(ans) { gpuAssert((ans), __FILE__, __LINE__); }
inline void gpuAssert(cudaError_t code, const char *file, int line, bool abort=true){ inline void gpuAssert(cudaError_t code, const char *file, int line, bool abort=true){
if (code != cudaSuccess){ if (code != cudaSuccess){
@ -17,37 +17,42 @@ inline void gpuAssert(cudaError_t code, const char *file, int line, bool abort=t
extern int DEVIATION; extern int DEVIATION;
extern int NUMBER_OF_POINTS; extern int NUMBER_OF_POINTS;
extern int DIMENSIONS; extern int DIMENSIONS;
extern char* POINTS_FILENAME; extern const char* POINTS_FILENAME;
extern char* LABELS_FILENAME; extern const char* LABELS_FILENAME;
extern parameters params; extern Parameters params;
extern cudaDeviceProp device_properties; extern cudaDeviceProp device_properties;
//Function set_GPU parses available GPU devices, selects the one with the most multi-processors for
//usage and stores its properties in global struct device_properties
void set_GPU(); void set_GPU();
//Function meanshift recursively shifts original points according to th //Function meanshift recursively shifts original points according to the mean-shift algorithm saving
//mean-shift algorithm saving the result to shiftedPoints. Struct opt has user //the result to shiftedPoints. Struct opt has user options, h is the desirable deviation
//options, h is the desirable deviation.
int meanshift(double **original_points, double ***shifted_points, int h int meanshift(double **original_points, double ***shifted_points, int h
, parameters *opt); , Parameters *opt);
//Function init_device_memory allocates memory for necessary arrays in the device
void init_device_memory(double **original_points, double **shifted_points, void init_device_memory(double **original_points, double **shifted_points,
Matrix *d_original_points, Matrix *d_shifted_points, Matrix *d_original_points, Matrix *d_shifted_points, Matrix *d_kernel_matrix,
Matrix *d_kernel_matrix, Matrix *d_denominator, Matrix *d_new_shift); Matrix *d_denominator, Matrix *d_new_shift);
//Function calculate_kernel_matrix is a wrapper for the kernel call of the corresponding kernel
//"calculate_kernel_matrix_kernel" that calculates the kernel matrix
void calculate_kernel_matrix(Matrix d_shifted_points, Matrix d_original_points, void calculate_kernel_matrix(Matrix d_shifted_points, Matrix d_original_points,
Matrix d_kernel_matrix, double deviation, double ***kernel_matrix); Matrix d_kernel_matrix, double deviation, double ***kernel_matrix);
//Function multiply allocates memory in GPU, sends the data and calls the //Function calculate_denominator is a wrapper for the kernel call of the corresponding kernel
//multiply kernel function. //"calculate_denominator_kernel" that calculates the denominator of shifted points fraction
void calculate_denominator(Matrix d_kernel_matrix, Matrix d_denominator);
//Function shift_points is a wrapper for the kernel call of the corresponding kernel
//"shift_points_kernel" that shifts the positions of all points
void shift_points(Matrix d_kernel_matrix, Matrix d_original_points, Matrix d_shifted_points, void shift_points(Matrix d_kernel_matrix, Matrix d_original_points, Matrix d_shifted_points,
Matrix d_new_shift, Matrix d_denominator, Matrix d_mean_shift_vector, double **kernel_matrix, Matrix d_new_shift, Matrix d_denominator, Matrix d_mean_shift_vector, double **kernel_matrix,
double **original_points, double ***new_shift, double ***mean_shift_vector); double **original_points, double ***new_shift, double ***mean_shift_vector);
//Function free_device_memory frees device's previously allocated memory
void free_device_memory(Matrix d_original_points, Matrix d_kernel_matrix, Matrix d_denominator, void free_device_memory(Matrix d_original_points, Matrix d_kernel_matrix, Matrix d_denominator,
Matrix d_new_shift); Matrix d_new_shift);
//Function calculate_denominator allocates memory in GPU, sends the data and calls the
//denominator kernel function.
void calculate_denominator(Matrix d_kernel_matrix, Matrix d_denominator);
#endif //SERIAL_GPU_UTILS_H #endif //SERIAL_GPU_UTILS_H

41
mean_shift_cuda/meanshift_kernels.cu

@ -16,7 +16,8 @@ __global__ void calculate_kernel_matrix_kernel(Matrix shifted_points, Matrix ori
// calculate distance // calculate distance
double sum = 0, dif; double sum = 0, dif;
for (int i=0; i<dimensions; i++){ for (int i=0; i<dimensions; i++){
dif = shifted_points.elements[row * dimensions + i] - original_points.elements[col * dimensions + i]; dif = shifted_points.elements[row * dimensions + i]
- original_points.elements[col * dimensions + i];
sum += dif * dif; sum += dif * dif;
} }
double distance = sqrt(sum); double distance = sqrt(sum);
@ -34,8 +35,25 @@ __global__ void calculate_kernel_matrix_kernel(Matrix shifted_points, Matrix ori
} }
} }
__global__ void shift_points_kernel(Matrix original_points, Matrix kernel_matrix, Matrix shifted_points, __global__ void denominator_kernel(Matrix denominator, Matrix kernel_matrix){
Matrix new_shift, Matrix denominator, Matrix mean_shift_vector){ // each thread computes one element of denominator_kernel
// by accumulating results into cell_value
double cell_value = 0;
int row = blockIdx.x * blockDim.x + threadIdx.x;
// performs calculations only if thread's indexes are within matrix bounds
if (row >= denominator.height){
return;
}
for (int column = 0; column < kernel_matrix.width; ++column){
cell_value += kernel_matrix.elements[row * kernel_matrix.width + column];
}
denominator.elements[row] = cell_value;
}
__global__ void shift_points_kernel(Matrix original_points, Matrix kernel_matrix,
Matrix shifted_points, Matrix new_shift, Matrix denominator, Matrix mean_shift_vector){
// each thread computes one element of new_shift // each thread computes one element of new_shift
// by accumulating results into cell_value // by accumulating results into cell_value
double cell_value = 0; double cell_value = 0;
@ -62,20 +80,3 @@ __global__ void shift_points_kernel(Matrix original_points, Matrix kernel_matrix
new_shift.elements[row * new_shift.width + col] - new_shift.elements[row * new_shift.width + col] -
shifted_points.elements[row * new_shift.width + col]; shifted_points.elements[row * new_shift.width + col];
} }
__global__ void denominator_kernel(Matrix denominator, Matrix kernel_matrix){
// each thread computes one element of denominator_kernel
// by accumulating results into cell_value
double cell_value = 0;
int row = blockIdx.x * blockDim.x + threadIdx.x;
// performs calculations only if thread's indexes are within matrix bounds
if (row >= denominator.height){
return;
}
for (int column = 0; column < kernel_matrix.width; ++column){
cell_value += kernel_matrix.elements[row * kernel_matrix.width + column];
}
denominator.elements[row] = cell_value;
}

15
mean_shift_cuda/meanshift_kernels.h

@ -1,19 +1,26 @@
#ifndef SERIAL_KERNELS_H /* Include guard */ #ifndef SERIAL_KERNELS_H /* Include guard */
#define SERIAL_KERNELS_H #define SERIAL_KERNELS_H
/* Structures */
//Matrix is used to describe matrices
typedef struct { typedef struct {
int width; int width;
int height; int height;
double *elements; double *elements;
} Matrix; } Matrix;
//Kernel calculate_kernel_matrix_kernel calculates the current kernel matrix
__global__ void calculate_kernel_matrix_kernel(Matrix shifted_points, Matrix original_points, __global__ void calculate_kernel_matrix_kernel(Matrix shifted_points, Matrix original_points,
double deviation, Matrix kernel_matrix); double deviation, Matrix kernel_matrix);
//Function multiply_kernel calculates the product of matrices 1 and 2 into output. //Kernel denominator_kernel calculates the sum in the denominator of the fraction used to find new
__global__ void shift_points_kernel(Matrix original_points, Matrix kernel_matrix, Matrix shifted_points, //(shifted) positions of the points
Matrix new_shift, Matrix denominator, Matrix mean_shift_vector);
__global__ void denominator_kernel(Matrix denominator, Matrix kernel_matrix); __global__ void denominator_kernel(Matrix denominator, Matrix kernel_matrix);
//Kernel shift_points_kernel shifts the positions of all points and calculates the new mean shift
//vector according to the new point array
__global__ void shift_points_kernel(Matrix original_points, Matrix kernel_matrix,
Matrix shifted_points, Matrix new_shift, Matrix denominator, Matrix mean_shift_vector);
#endif //SERIAL_KERNELS_H #endif //SERIAL_KERNELS_H

23
mean_shift_cuda/meanshift_utils.h

@ -3,30 +3,33 @@
#include <stdbool.h> #include <stdbool.h>
/* Structs */ /* Structures */
//Parameters is used to store session specific variables in an orderly way
typedef struct parameters { typedef struct parameters {
double epsilon; double epsilon;
bool verbose; bool verbose;
bool display; bool display;
} parameters; } Parameters;
//Function get_args parses command line arguments. //Function get_args parses command line arguments
void get_args(int argc, char **argv, parameters *params); void get_args(int argc, char **argv, Parameters *params);
//Function init reads the dataset and label arrays from the corresponding files. //Function init sets up the GPU for later use, gets its properties and reads the dataset and label
//arrays from the corresponding files
void init(double ***vectors, char **labels); void init(double ***vectors, char **labels);
//Function alloc_double allocates rows*cols bytes of continuous memory. //Function alloc_double allocates rows*cols bytes of continuous memory
double **alloc_double(int rows, int cols); double **alloc_double(int rows, int cols);
//Function duplicate copies the values of source array to dest array. //Function duplicate copies the values of source array to dest array
void duplicate(double **source, int rows, int cols, double ***dest); void duplicate(double **source, int rows, int cols, double ***dest);
//Function print_matrix prints array of dimensions rowsXcols to the console. //Function print_matrix prints array of dimensions <rows X cols> to the console
void print_matrix(double **array, int rows, int cols); void print_matrix(double **array, int rows, int cols);
//Function save_matrix prints matrix in a csv file with path/filename //Function save_matrix stores matrix in a csv file with path/filename "../output/output_iteration".
//"output/output_iteration". If a file already exists new lines are concatenated. //If a file already exists new lines are concatenated
void save_matrix(double **matrix, int iteration); void save_matrix(double **matrix, int iteration);
#endif //SERIAL_UTILS_H #endif //SERIAL_UTILS_H
Loading…
Cancel
Save