norm kernel

8 years ago · 08d4142f7d
2 changed files with 57 additions and 0 deletions
--- a/mean_shift_cuda/meanshift_kernels.cu
+++ b/mean_shift_cuda/meanshift_kernels.cu
@ -80,3 +80,57 @@ __global__ void shift_points_kernel(Matrix original_points, Matrix kernel_matrix
        new_shift.elements[row * new_shift.width + col] -
        shifted_points.elements[row * new_shift.width + col];
 }
+
+__global__ void norm(Matrix mean_shift_vector, double *current_norm) {
+    // each thread computes one element of new_shift
+    // by accumulating results into cell_value
+    double cell_value = 0;
+    int row = blockIdx.x * blockDim.x + threadIdx.x;
+    int col = blockIdx.y * blockDim.y + threadIdx.y;
+
+    // performs calculations only if thread's indexes are within matrix bounds
+    if (row * mean_shift_vector.width + col >= mean_shift_vector.width * mean_shift_vector.height){
+        return;
+    }
+
+    for (int element_index = 0; element_index < mean_shift_vector.width; ++element_index){
+        cell_value += mean_shift_vector.elements[row * mean_shift_vector.width + element_index]
+                      * mean_shift_vector.elements[row * mean_shift_vector.width + element_index];
+    }
+
+    &current_norm = sqrt(cell_value);
+
+
+//    // new_shift elements are calculated by dividing with the denominator
+//    new_shift.elements[row * new_shift.width + col] =
+//            cell_value / denominator.elements[row];
+//
+//    // calculates mean-shift vector
+//    mean_shift_vector.elements[row * new_shift.width + col] =
+//            new_shift.elements[row * new_shift.width + col] -
+//            shifted_points.elements[row * new_shift.width + col];
+
+//    int n_tid = 2 * (threadIdx.x + blockIdx.x * blockDim.x);
+//    int i = 1;
+//    int initial_tid = n_tid / 2;
+//    int limit = gridDim.x * blockDim.x;
+
+//    int block_end = 2 * (blockIdx.x * blockDim.x + blockDim.x) - 1;
+//
+//    if (n_tid < (2 * limit)){
+//
+//        while ( (i < (2 * blockDim.x)) && n_tid < block_end &&
+//                (n_tid + i) <= block_end){
+//
+//            norms[n_tid] += norms[n_tid + i];
+//            n_tid = n_tid + i * (initial_tid * 2 - 2 * (blockIdx.x * blockDim.x));
+//            i *= 2;
+//            __syncthreads();
+//        }
+//
+//
+//        if (!((initial_tid) % blockDim.x))
+//            norm_per_block[blockIdx.x] = norms[n_tid];
+//
+//    }
+}
--- a/mean_shift_cuda/meanshift_kernels.h
+++ b/mean_shift_cuda/meanshift_kernels.h
@ -23,4 +23,7 @@ __global__ void denominator_kernel(Matrix denominator, Matrix kernel_matrix);
 __global__ void shift_points_kernel(Matrix original_points, Matrix kernel_matrix,
    Matrix shifted_points, Matrix new_shift, Matrix denominator, Matrix mean_shift_vector);

+//Kernel norm computes the norm of the vector
+__global__ void norm(Matrix mean_shift_vector, double *current_norm);
+
 #endif //SERIAL_KERNELS_H