Browse Source

calculate norm - 1st commit

master
anapt 7 years ago
parent
commit
a2f29a6ad2
  1. 72
      mean_shift_cuda/meanshift_gpu_utils.cu
  2. 4
      mean_shift_cuda/meanshift_gpu_utils.h

72
mean_shift_cuda/meanshift_gpu_utils.cu

@ -133,20 +133,23 @@ int meanshift(double **original_points, double ***shifted_points, int deviation)
} }
// calculates norm of the new mean shift vector in GPU using "cuBlas" library function // calculates norm of the new mean shift vector in GPU using "cuBlas" library function
cublasHandle_t handle; // TODO REPLACE WITH KERNEL NORM
cublasStatus_t cublas_status = cublasCreate(&handle); // cublasHandle_t handle;
if (cublas_status != CUBLAS_STATUS_SUCCESS){ // cublasStatus_t cublas_status = cublasCreate(&handle);
exit(cublas_status); // if (cublas_status != CUBLAS_STATUS_SUCCESS){
} // exit(cublas_status);
cublas_status = cublasDnrm2(handle, NUMBER_OF_POINTS * DIMENSIONS, d_mean_shift_vector.elements, // }
1, &current_norm); // cublas_status = cublasDnrm2(handle, NUMBER_OF_POINTS * DIMENSIONS, d_mean_shift_vector.elements,
if (cublas_status != CUBLAS_STATUS_SUCCESS){ // 1, &current_norm);
exit(cublas_status); // if (cublas_status != CUBLAS_STATUS_SUCCESS){
} // exit(cublas_status);
cublas_status = cublasDestroy(handle); // }
if (cublas_status != CUBLAS_STATUS_SUCCESS){ // cublas_status = cublasDestroy(handle);
exit(cublas_status); // if (cublas_status != CUBLAS_STATUS_SUCCESS){
} // exit(cublas_status);
// }
calculate_norm(d_mean_shift_vector, &current_norm);
if (params.verbose){ if (params.verbose){
printf("Recursion n. %d, error\t%f \n", recursion, current_norm); printf("Recursion n. %d, error\t%f \n", recursion, current_norm);
@ -295,9 +298,9 @@ void calculate_denominator(Matrix d_kernel_matrix, Matrix d_denominator){
} }
void shift_points(Matrix d_kernel_matrix, Matrix d_original_points, Matrix d_shifted_points, void shift_points(Matrix d_kernel_matrix, Matrix d_original_points, Matrix d_shifted_points,
Matrix d_new_shift, Matrix d_denominator, Matrix d_mean_shift_vector, double **kernel_matrix, Matrix d_new_shift, Matrix d_denominator, Matrix d_mean_shift_vector, double **kernel_matrix,
double **original_points, double ***new_shift, double ***mean_shift_vector, double **original_points, double ***new_shift,
double *w_memcpy_time){ double ***mean_shift_vector, double *w_memcpy_time){
int size; int size;
static bool first_iter = true; static bool first_iter = true;
// gets max block size supported from the device // gets max block size supported from the device
@ -346,6 +349,41 @@ void shift_points(Matrix d_kernel_matrix, Matrix d_original_points, Matrix d_shi
/ 1.0e6 + end_w_time.tv_sec - start_w_time.tv_sec); / 1.0e6 + end_w_time.tv_sec - start_w_time.tv_sec);
} }
void calculate_norm(Matrix d_mean_shift_vector, double *current_norm){
int size;
static bool first_iter = true;
// gets max block size supported from the device
static int max_block_size = device_properties.maxThreadsPerBlock;
static int requested_block_size = (int)(max_block_size / d_mean_shift_vector.width);
bool block_size_too_big = true;
dim3 dimBlock;
dim3 dimGrid;
do {
dimBlock.x = requested_block_size;
dimBlock.y = d_mean_shift_vector.width;
dimGrid.x = (d_mean_shift_vector.height + dimBlock.x - 1) / dimBlock.x;
dimGrid.y = 1;
norm<<<dimGrid, dimBlock>>>(d_mean_shift_vector, &current_norm);
if (cudaGetLastError() != cudaSuccess){
--requested_block_size;
} else {
block_size_too_big = false;
gpuErrchk( cudaDeviceSynchronize() );
}
} while(block_size_too_big);
if (first_iter && params.verbose){
printf("norm_kernel called with:\n");
printf("dimBlock.x = %d, dimBlock.y = %d\n", dimBlock.x, dimBlock.y);
printf("dimGrid.x = %d, dimGrid.y = %d\n\n", dimGrid.x, dimGrid.y);
first_iter = false;
}
size = NUMBER_OF_POINTS * DIMENSIONS * sizeof(double);
}
void free_device_memory(Matrix d_original_points, Matrix d_kernel_matrix, Matrix d_denominator, void free_device_memory(Matrix d_original_points, Matrix d_kernel_matrix, Matrix d_denominator,
Matrix d_shifted_points){ Matrix d_shifted_points){
// frees all memory previously allocated in device // frees all memory previously allocated in device

4
mean_shift_cuda/meanshift_gpu_utils.h

@ -51,6 +51,10 @@ void shift_points(Matrix d_kernel_matrix, Matrix d_original_points, Matrix d_shi
double **original_points, double ***new_shift, double ***mean_shift_vector, double **original_points, double ***new_shift, double ***mean_shift_vector,
double *w_memcpy_time); double *w_memcpy_time);
//Function calculate_norm is a wrapper for the kernel call of the corresponing kernel
//"norm" that calculate the norm of the mean_shift_vector matrix
void calculate_norm(Matrix d_mean_shift_vector, double *current_norm);
//Function free_device_memory frees device's previously allocated memory //Function free_device_memory frees device's previously allocated memory
void free_device_memory(Matrix d_original_points, Matrix d_kernel_matrix, Matrix d_denominator, void free_device_memory(Matrix d_original_points, Matrix d_kernel_matrix, Matrix d_denominator,
Matrix d_shifted_points); Matrix d_shifted_points);

Loading…
Cancel
Save