From 08d4142f7d89bccdbeeff73ff2a164c9816e4b26 Mon Sep 17 00:00:00 2001 From: anapt Date: Tue, 6 Feb 2018 15:46:46 +0200 Subject: [PATCH] norm kernel --- mean_shift_cuda/meanshift_kernels.cu | 54 ++++++++++++++++++++++++++++ mean_shift_cuda/meanshift_kernels.h | 3 ++ 2 files changed, 57 insertions(+) diff --git a/mean_shift_cuda/meanshift_kernels.cu b/mean_shift_cuda/meanshift_kernels.cu index a20055d..47c6dd7 100644 --- a/mean_shift_cuda/meanshift_kernels.cu +++ b/mean_shift_cuda/meanshift_kernels.cu @@ -79,4 +79,58 @@ __global__ void shift_points_kernel(Matrix original_points, Matrix kernel_matrix mean_shift_vector.elements[row * new_shift.width + col] = new_shift.elements[row * new_shift.width + col] - shifted_points.elements[row * new_shift.width + col]; +} + +__global__ void norm(Matrix mean_shift_vector, double *current_norm) { + // each thread computes one element of new_shift + // by accumulating results into cell_value + double cell_value = 0; + int row = blockIdx.x * blockDim.x + threadIdx.x; + int col = blockIdx.y * blockDim.y + threadIdx.y; + + // performs calculations only if thread's indexes are within matrix bounds + if (row * mean_shift_vector.width + col >= mean_shift_vector.width * mean_shift_vector.height){ + return; + } + + for (int element_index = 0; element_index < mean_shift_vector.width; ++element_index){ + cell_value += mean_shift_vector.elements[row * mean_shift_vector.width + element_index] + * mean_shift_vector.elements[row * mean_shift_vector.width + element_index]; + } + + ¤t_norm = sqrt(cell_value); + + +// // new_shift elements are calculated by dividing with the denominator +// new_shift.elements[row * new_shift.width + col] = +// cell_value / denominator.elements[row]; +// +// // calculates mean-shift vector +// mean_shift_vector.elements[row * new_shift.width + col] = +// new_shift.elements[row * new_shift.width + col] - +// shifted_points.elements[row * new_shift.width + col]; + +// int n_tid = 2 * (threadIdx.x + blockIdx.x * blockDim.x); +// int i = 1; +// int initial_tid = n_tid / 2; +// int limit = gridDim.x * blockDim.x; + +// int block_end = 2 * (blockIdx.x * blockDim.x + blockDim.x) - 1; +// +// if (n_tid < (2 * limit)){ +// +// while ( (i < (2 * blockDim.x)) && n_tid < block_end && +// (n_tid + i) <= block_end){ +// +// norms[n_tid] += norms[n_tid + i]; +// n_tid = n_tid + i * (initial_tid * 2 - 2 * (blockIdx.x * blockDim.x)); +// i *= 2; +// __syncthreads(); +// } +// +// +// if (!((initial_tid) % blockDim.x)) +// norm_per_block[blockIdx.x] = norms[n_tid]; +// +// } } \ No newline at end of file diff --git a/mean_shift_cuda/meanshift_kernels.h b/mean_shift_cuda/meanshift_kernels.h index 0ff8070..90b4dce 100644 --- a/mean_shift_cuda/meanshift_kernels.h +++ b/mean_shift_cuda/meanshift_kernels.h @@ -23,4 +23,7 @@ __global__ void denominator_kernel(Matrix denominator, Matrix kernel_matrix); __global__ void shift_points_kernel(Matrix original_points, Matrix kernel_matrix, Matrix shifted_points, Matrix new_shift, Matrix denominator, Matrix mean_shift_vector); +//Kernel norm computes the norm of the vector +__global__ void norm(Matrix mean_shift_vector, double *current_norm); + #endif //SERIAL_KERNELS_H \ No newline at end of file