diff --git a/mean_shift_cuda/meanshift_kernels.cu b/mean_shift_cuda/meanshift_kernels.cu index 4115289..fdc1867 100644 --- a/mean_shift_cuda/meanshift_kernels.cu +++ b/mean_shift_cuda/meanshift_kernels.cu @@ -27,7 +27,7 @@ __global__ void calculate_kernel_matrix_kernel(Matrix shifted_points, Matrix ori } int dimensions = shifted_points.width; - + // calculate distance double sum = 0, dif; for (int i=0; i>>(Matrix denominator, Matrix kernel_matrix, int total){ + + int row = blockIdx.x * blockDim.x + threadIdx.x; + int col = blockIdx.y * blockDim.y + threadIdx.y; + + + if (row>=total || col>=total) + return; + + denominator[col]=0; + denominator[row] += kernel_matrix[row*denominator.width + col]; + +} + +// serial + +// calculate denominator +for (int i=0; i>>(d_denominator_matrix, d_kernel_matrix, T); + gpuErrchk( cudaPeekAtLastError() ); + gpuErrchk( cudaDeviceSynchronize() ); + + size = NUMBER_OF_POINTS sizeof(double); + gpuErrchk( cudaMemcpy(&((*denominator)[0]), d_denominator_matrix.elements + ,size, cudaMemcpyDeviceToHost) ); + + gpuErrchk( cudaFree(d_kernel_matrix.elements) ); + gpuErrchk( cudaFree(d_original_points.elements) ); + gpuErrchk( cudaFree(d_new_shift.elements) ); +} \ No newline at end of file diff --git a/mean_shift_cuda/meanshift_utils.h b/mean_shift_cuda/meanshift_utils.h index 9d80c14..c1f3e5e 100644 --- a/mean_shift_cuda/meanshift_utils.h +++ b/mean_shift_cuda/meanshift_utils.h @@ -71,4 +71,8 @@ void print_matrix(double **array, int rows, int cols); void save_matrix(double **matrix , int iteration); +//Function calculate_denominator allocates memory in GPU, sends the data and calls the +//denominator kernel function. +void calculate_denominator(double **kernel_matrix); + #endif //SERIAL_UTILS_H \ No newline at end of file