#include "meanshift_kernels.h" #include __global__ void multiply_kernel(Matrix matrix1, Matrix matrix2, Matrix output){ // Each thread computes one element of output // by accumulating results into cell_value double cell_value = 0; int row = blockIdx.x * blockDim.x + threadIdx.x; int col = blockIdx.y * blockDim.y + threadIdx.y; if (row + col < output.height * output.width){ for (int element_index = 0; element_index < matrix1.width; ++element_index){ cell_value += matrix1.elements[row * matrix1.width + element_index] * matrix2.elements[element_index * matrix2.width + col]; } output.elements[row * output.width + col] = cell_value; } } __global__ void calculate_kernel_matrix_kernel(Matrix shifted_points, Matrix original_points , double deviation, Matrix kernel_matrix){ // Each thread calculates one element of kernel_matrix int row = blockIdx.x * blockDim.x + threadIdx.x; int col = blockIdx.y * blockDim.y + threadIdx.y; if (row * kernel_matrix.width + col > kernel_matrix.width * kernel_matrix.height){ return; } int dimensions = shifted_points.width; // calculate distance double sum = 0, dif; for (int i=0; i>>(Matrix denominator, Matrix kernel_matrix, int total){ int row = blockIdx.x * blockDim.x + threadIdx.x; int col = blockIdx.y * blockDim.y + threadIdx.y; if (row>=total || col>=total) return; denominator[col]=0; denominator[row] += kernel_matrix[row*denominator.width + col]; } // serial // calculate denominator for (int i=0; i