#include "meanshift_kernels.h" #include __global__ void calculate_kernel_matrix_kernel(Matrix shifted_points, Matrix original_points, double deviation, Matrix kernel_matrix){ // each thread calculates one element of kernel_matrix int row = blockIdx.x * blockDim.x + threadIdx.x; int col = blockIdx.y * blockDim.y + threadIdx.y; // performs calculations only if thread's indexes are within matrix bounds if (row * kernel_matrix.width + col >= kernel_matrix.width * kernel_matrix.height){ return; } int dimensions = shifted_points.width; // calculate distance double sum = 0, dif; for (int i=0; i= denominator.height){ return; } for (int column = 0; column < kernel_matrix.width; ++column){ cell_value += kernel_matrix.elements[row * kernel_matrix.width + column]; } denominator.elements[row] = cell_value; } __global__ void shift_points_kernel(Matrix original_points, Matrix kernel_matrix, Matrix shifted_points, Matrix new_shift, Matrix denominator, Matrix mean_shift_vector){ // each thread computes one element of new_shift // by accumulating results into cell_value double cell_value = 0; int row = blockIdx.x * blockDim.x + threadIdx.x; int col = blockIdx.y * blockDim.y + threadIdx.y; // performs calculations only if thread's indexes are within matrix bounds if (row * new_shift.width + col >= new_shift.width * new_shift.height){ return; } // calculates new_shift // builds nominator by multiplying kernel_matrix and original_points for (int element_index = 0; element_index < kernel_matrix.width; ++element_index){ cell_value += kernel_matrix.elements[row * kernel_matrix.width + element_index] * original_points.elements[element_index * original_points.width + col]; } // new_shift elements are calculated by dividing with the denominator new_shift.elements[row * new_shift.width + col] = cell_value / denominator.elements[row]; // calculates mean-shift vector mean_shift_vector.elements[row * new_shift.width + col] = new_shift.elements[row * new_shift.width + col] - shifted_points.elements[row * new_shift.width + col]; } __global__ void norm(Matrix mean_shift_vector, double *current_norm) { // each thread computes one element of new_shift // by accumulating results into cell_value double cell_value = 0; int row = blockIdx.x * blockDim.x + threadIdx.x; int col = blockIdx.y * blockDim.y + threadIdx.y; // performs calculations only if thread's indexes are within matrix bounds if (row * mean_shift_vector.width + col >= mean_shift_vector.width * mean_shift_vector.height){ return; } for (int element_index = 0; element_index < mean_shift_vector.width; ++element_index){ cell_value += mean_shift_vector.elements[row * mean_shift_vector.width + element_index] * mean_shift_vector.elements[row * mean_shift_vector.width + element_index]; } *current_norm = sqrt(cell_value); // // new_shift elements are calculated by dividing with the denominator // new_shift.elements[row * new_shift.width + col] = // cell_value / denominator.elements[row]; // // // calculates mean-shift vector // mean_shift_vector.elements[row * new_shift.width + col] = // new_shift.elements[row * new_shift.width + col] - // shifted_points.elements[row * new_shift.width + col]; // int n_tid = 2 * (threadIdx.x + blockIdx.x * blockDim.x); // int i = 1; // int initial_tid = n_tid / 2; // int limit = gridDim.x * blockDim.x; // int block_end = 2 * (blockIdx.x * blockDim.x + blockDim.x) - 1; // // if (n_tid < (2 * limit)){ // // while ( (i < (2 * blockDim.x)) && n_tid < block_end && // (n_tid + i) <= block_end){ // // norms[n_tid] += norms[n_tid + i]; // n_tid = n_tid + i * (initial_tid * 2 - 2 * (blockIdx.x * blockDim.x)); // i *= 2; // __syncthreads(); // } // // // if (!((initial_tid) % blockDim.x)) // norm_per_block[blockIdx.x] = norms[n_tid]; // // } }