diff --git a/mean_shift_cuda/meanshift_gpu_utils.cu b/mean_shift_cuda/meanshift_gpu_utils.cu index f83b90a..c4061d0 100644 --- a/mean_shift_cuda/meanshift_gpu_utils.cu +++ b/mean_shift_cuda/meanshift_gpu_utils.cu @@ -133,22 +133,20 @@ int meanshift(double **original_points, double ***shifted_points, int deviation) } // calculates norm of the new mean shift vector in GPU using "cuBlas" library function - // TODO REPLACE WITH KERNEL NORM -// cublasHandle_t handle; -// cublasStatus_t cublas_status = cublasCreate(&handle); -// if (cublas_status != CUBLAS_STATUS_SUCCESS){ -// exit(cublas_status); -// } -// cublas_status = cublasDnrm2(handle, NUMBER_OF_POINTS * DIMENSIONS, d_mean_shift_vector.elements, -// 1, ¤t_norm); -// if (cublas_status != CUBLAS_STATUS_SUCCESS){ -// exit(cublas_status); -// } -// cublas_status = cublasDestroy(handle); -// if (cublas_status != CUBLAS_STATUS_SUCCESS){ -// exit(cublas_status); -// } - calculate_norm(d_mean_shift_vector, ¤t_norm); + cublasHandle_t handle; + cublasStatus_t cublas_status = cublasCreate(&handle); + if (cublas_status != CUBLAS_STATUS_SUCCESS){ + exit(cublas_status); + } + cublas_status = cublasDnrm2(handle, NUMBER_OF_POINTS * DIMENSIONS, d_mean_shift_vector.elements, + 1, ¤t_norm); + if (cublas_status != CUBLAS_STATUS_SUCCESS){ + exit(cublas_status); + } + cublas_status = cublasDestroy(handle); + if (cublas_status != CUBLAS_STATUS_SUCCESS){ + exit(cublas_status); + } if (params.verbose){ @@ -349,41 +347,6 @@ void shift_points(Matrix d_kernel_matrix, Matrix d_original_points, Matrix d_shi / 1.0e6 + end_w_time.tv_sec - start_w_time.tv_sec); } -void calculate_norm(Matrix d_mean_shift_vector, double *current_norm){ - int size; - static bool first_iter = true; - // gets max block size supported from the device - - static int requested_block_size = device_properties.maxThreadsPerBlock; - bool block_size_too_big = true; - - dim3 dimBlock; - dim3 dimGrid; - do { - dimBlock.x = requested_block_size; - dimBlock.y = 1; - dimGrid.x = (d_mean_shift_vector.height + dimBlock.x - 1) / dimBlock.x; - dimGrid.y = 1; - - norm<<>>(d_mean_shift_vector, current_norm); - if (cudaGetLastError() != cudaSuccess){ - --requested_block_size; - } else { - block_size_too_big = false; - gpuErrchk( cudaDeviceSynchronize() ); - } - } while(block_size_too_big); - - if (first_iter && params.verbose){ - printf("norm_kernel called with:\n"); - printf("dimBlock.x = %d, dimBlock.y = %d\n", dimBlock.x, dimBlock.y); - printf("dimGrid.x = %d, dimGrid.y = %d\n\n", dimGrid.x, dimGrid.y); - first_iter = false; - } - - size = NUMBER_OF_POINTS * DIMENSIONS * sizeof(double); -} - void free_device_memory(Matrix d_original_points, Matrix d_kernel_matrix, Matrix d_denominator, Matrix d_shifted_points){ // frees all memory previously allocated in device diff --git a/mean_shift_cuda/meanshift_gpu_utils.h b/mean_shift_cuda/meanshift_gpu_utils.h index a968d58..ced2ce5 100644 --- a/mean_shift_cuda/meanshift_gpu_utils.h +++ b/mean_shift_cuda/meanshift_gpu_utils.h @@ -51,10 +51,6 @@ void shift_points(Matrix d_kernel_matrix, Matrix d_original_points, Matrix d_shi double **original_points, double ***new_shift, double ***mean_shift_vector, double *w_memcpy_time); -//Function calculate_norm is a wrapper for the kernel call of the corresponing kernel -//"norm" that calculate the norm of the mean_shift_vector matrix -void calculate_norm(Matrix d_mean_shift_vector, double *current_norm); - //Function free_device_memory frees device's previously allocated memory void free_device_memory(Matrix d_original_points, Matrix d_kernel_matrix, Matrix d_denominator, Matrix d_shifted_points);