diff --git a/mean_shift_cuda/meanshift_kernels.cu b/mean_shift_cuda/meanshift_kernels.cu index fdc1867..832326d 100644 --- a/mean_shift_cuda/meanshift_kernels.cu +++ b/mean_shift_cuda/meanshift_kernels.cu @@ -54,8 +54,9 @@ __global__ void denominator_kernel<<>>(Matrix denominator, Ma int col = blockIdx.y * blockDim.y + threadIdx.y; - if (row>=total || col>=total) + if (row * denominator.width + col > denominator.width * denominator.height){ return; + } denominator[col]=0; denominator[row] += kernel_matrix[row*denominator.width + col]; diff --git a/mean_shift_cuda/meanshift_kernels.h b/mean_shift_cuda/meanshift_kernels.h index 05186fb..0f9398d 100644 --- a/mean_shift_cuda/meanshift_kernels.h +++ b/mean_shift_cuda/meanshift_kernels.h @@ -13,6 +13,6 @@ __global__ void multiply_kernel(Matrix matrix1, Matrix matrix2, Matrix output); __global__ void calculate_kernel_matrix_kernel(Matrix shifted_points, Matrix original_points , double deviation, Matrix kernel_matrix); -__global__ void denominator_kernel(Matrix denominator, Matrix kernel_matrix, int T); +__global__ void denominator_kernel(Matrix denominator, Matrix kernel_matrix); #endif //SERIAL_KERNELS_H \ No newline at end of file diff --git a/mean_shift_cuda/meanshift_utils.cu b/mean_shift_cuda/meanshift_utils.cu index e33382b..efafd56 100644 --- a/mean_shift_cuda/meanshift_utils.cu +++ b/mean_shift_cuda/meanshift_utils.cu @@ -197,7 +197,7 @@ int meanshift(double **original_points, double ***shifted_points, int deviation // } // denominator[i] = sum; // } - calculate_denominator(kernel_matrix); + denominator = calculate_denominator(kernel_matrix); // creates new y vector double **new_shift = alloc_2d_double(NUMBER_OF_POINTS, DIMENSIONS); @@ -469,7 +469,7 @@ double * calculate_denominator(double **kernel_matrix){ first_iter = false; } - denominator_kernel<<>>(d_denominator_matrix, d_kernel_matrix, T); + denominator_kernel<<>>(d_denominator_matrix, d_kernel_matrix); gpuErrchk( cudaPeekAtLastError() ); gpuErrchk( cudaDeviceSynchronize() ); diff --git a/mean_shift_cuda/meanshift_utils.h b/mean_shift_cuda/meanshift_utils.h index c1f3e5e..fbf4394 100644 --- a/mean_shift_cuda/meanshift_utils.h +++ b/mean_shift_cuda/meanshift_utils.h @@ -73,6 +73,6 @@ void save_matrix(double **matrix //Function calculate_denominator allocates memory in GPU, sends the data and calls the //denominator kernel function. -void calculate_denominator(double **kernel_matrix); +double * calculate_denominator(double **kernel_matrix); #endif //SERIAL_UTILS_H \ No newline at end of file