norm kernel

8 years ago · 08d4142f7d
2 changed files with 57 additions and 0 deletions
--- a/mean_shift_cuda/meanshift_kernels.cu
+++ b/mean_shift_cuda/meanshift_kernels.cu
@ -80,3 +80,57 @@ __global__ void shift_points_kernel(Matrix original_points, Matrix kernel_matrix
        new_shift.elements[row * new_shift.width + col] -
        shifted_points.elements[row * new_shift.width + col];
 }
 __global__ void norm(Matrix mean_shift_vector, double *current_norm) {
    // each thread computes one element of new_shift
    // by accumulating results into cell_value
    double cell_value = 0;
    int row = blockIdx.x * blockDim.x + threadIdx.x;
    int col = blockIdx.y * blockDim.y + threadIdx.y;
    // performs calculations only if thread's indexes are within matrix bounds
    if (row * mean_shift_vector.width + col >= mean_shift_vector.width * mean_shift_vector.height){
        return;
    }
    for (int element_index = 0; element_index < mean_shift_vector.width; ++element_index){
        cell_value += mean_shift_vector.elements[row * mean_shift_vector.width + element_index]
                      * mean_shift_vector.elements[row * mean_shift_vector.width + element_index];
    }
    &current_norm = sqrt(cell_value);
 //    // new_shift elements are calculated by dividing with the denominator
 //    new_shift.elements[row * new_shift.width + col] =
 //            cell_value / denominator.elements[row];
 //
 //    // calculates mean-shift vector
 //    mean_shift_vector.elements[row * new_shift.width + col] =
 //            new_shift.elements[row * new_shift.width + col] -
 //            shifted_points.elements[row * new_shift.width + col];
 //    int n_tid = 2 * (threadIdx.x + blockIdx.x * blockDim.x);
 //    int i = 1;
 //    int initial_tid = n_tid / 2;
 //    int limit = gridDim.x * blockDim.x;
 //    int block_end = 2 * (blockIdx.x * blockDim.x + blockDim.x) - 1;
 //
 //    if (n_tid < (2 * limit)){
 //
 //        while ( (i < (2 * blockDim.x)) && n_tid < block_end &&
 //                (n_tid + i) <= block_end){
 //
 //            norms[n_tid] += norms[n_tid + i];
 //            n_tid = n_tid + i * (initial_tid * 2 - 2 * (blockIdx.x * blockDim.x));
 //            i *= 2;
 //            __syncthreads();
 //        }
 //
 //
 //        if (!((initial_tid) % blockDim.x))
 //            norm_per_block[blockIdx.x] = norms[n_tid];
 //
 //    }
 }
--- a/mean_shift_cuda/meanshift_kernels.h
+++ b/mean_shift_cuda/meanshift_kernels.h
@ -23,4 +23,7 @@ __global__ void denominator_kernel(Matrix denominator, Matrix kernel_matrix);
 __global__ void shift_points_kernel(Matrix original_points, Matrix kernel_matrix,
    Matrix shifted_points, Matrix new_shift, Matrix denominator, Matrix mean_shift_vector);
 //Kernel norm computes the norm of the vector
 __global__ void norm(Matrix mean_shift_vector, double *current_norm);
 #endif //SERIAL_KERNELS_H