Browse Source

kernel calculate denominator

master
anapt 7 years ago
parent
commit
37306b6a0f
  1. 27
      mean_shift_cuda/meanshift_kernels.cu
  2. 2
      mean_shift_cuda/meanshift_kernels.h
  3. 67
      mean_shift_cuda/meanshift_utils.cu
  4. 4
      mean_shift_cuda/meanshift_utils.h

27
mean_shift_cuda/meanshift_kernels.cu

@ -27,7 +27,7 @@ __global__ void calculate_kernel_matrix_kernel(Matrix shifted_points, Matrix ori
}
int dimensions = shifted_points.width;
// calculate distance
double sum = 0, dif;
for (int i=0; i<dimensions; i++){
dif = shifted_points.elements[row * dimensions + i] - original_points.elements[col * dimensions + i];
@ -46,4 +46,29 @@ __global__ void calculate_kernel_matrix_kernel(Matrix shifted_points, Matrix ori
if (row == col){
kernel_matrix.elements[row * kernel_matrix.width + col] += 1;
}
}
__global__ void denominator_kernel<<<dimGrid, dimBlock>>>(Matrix denominator, Matrix kernel_matrix, int total){
int row = blockIdx.x * blockDim.x + threadIdx.x;
int col = blockIdx.y * blockDim.y + threadIdx.y;
if (row>=total || col>=total)
return;
denominator[col]=0;
denominator[row] += kernel_matrix[row*denominator.width + col];
}
// serial
// calculate denominator
for (int i=0; i<NUMBER_OF_POINTS; i++){
double sum = 0;
for (int j=0; j<NUMBER_OF_POINTS; j++){
sum = sum + kernel_matrix[i][j];
}
denominator[i] = sum;
}

2
mean_shift_cuda/meanshift_kernels.h

@ -13,4 +13,6 @@ __global__ void multiply_kernel(Matrix matrix1, Matrix matrix2, Matrix output);
__global__ void calculate_kernel_matrix_kernel(Matrix shifted_points, Matrix original_points
, double deviation, Matrix kernel_matrix);
__global__ void denominator_kernel(Matrix denominator, Matrix kernel_matrix, int T);
#endif //SERIAL_KERNELS_H

67
mean_shift_cuda/meanshift_utils.cu

@ -183,18 +183,21 @@ int meanshift(double **original_points, double ***shifted_points, int deviation
kernel_matrix = alloc_2d_double(NUMBER_OF_POINTS, NUMBER_OF_POINTS);
denominator = (double *)malloc(NUMBER_OF_POINTS * sizeof(double));
}
// TODO move arrays to device and create global kernel for the iteration
// finds pairwise distance matrix (inside radius)
// [I, D] = rangesearch(x,y,h);
calculate_kernel_matrix((*shifted_points), original_points, deviation, &kernel_matrix);
// calculate denominator
for (int i=0; i<NUMBER_OF_POINTS; i++){
double sum = 0;
for (int j=0; j<NUMBER_OF_POINTS; j++){
sum = sum + kernel_matrix[i][j];
}
denominator[i] = sum;
}
// // calculate denominator
// for (int i=0; i<NUMBER_OF_POINTS; i++){
// double sum = 0;
// for (int j=0; j<NUMBER_OF_POINTS; j++){
// sum = sum + kernel_matrix[i][j];
// }
// denominator[i] = sum;
// }
calculate_denominator(kernel_matrix);
// creates new y vector
double **new_shift = alloc_2d_double(NUMBER_OF_POINTS, DIMENSIONS);
@ -244,7 +247,7 @@ int meanshift(double **original_points, double ***shifted_points, int deviation
return iteration;
}
// TODO check why there's is a difference in the norm calculate in matlab
double norm(double **matrix, int rows, int cols){
double sum=0, temp_mul=0;
for (int i=0; i<rows; i++) {
@ -432,3 +435,49 @@ void save_matrix(double **matrix, int iteration){
fprintf(file, "\n");
}
}
void calculate_denominator(double **kernel_matrix){
static bool first_iter = true;
// allocates memory for denominator_matrix in GPU
Matrix d_denominator_matrix;
d_denominator_matrix.width = NUMBER_OF_POINTS;
d_denominator_matrix = 1;
int size = NUMBER_OF_POINTS * sizeof(double);
gpuErrchk( cudaMalloc(&d_denominator_matrix.elements, size) );
// allocates memory for kernel_matrix in GPU and copies the array
Matrix d_kernel_matrix;
d_kernel_matrix.width = NUMBER_OF_POINTS;
d_kernel_matrix.height = NUMBER_OF_POINTS;
int size = NUMBER_OF_POINTS * NUMBER_OF_POINTS * sizeof(double);
gpuErrchk( cudaMalloc(&d_kernel_matrix.elements, size) );
gpuErrchk( cudaMemcpy(d_kernel_matrix.elements, &(kernel_matrix[0][0])
, size, cudaMemcpyHostToDevice) );
// get max sizes supported from the device
int max_block_size = device_properties.maxThreadsPerBlock;
dim3 dimBlock((d_new_shift.height < sqrt(max_block_size)) ? d_new_shift.height : sqrt(max_block_size)
, (d_new_shift.width < sqrt(max_block_size)) ? d_new_shift.width : sqrt(max_block_size));
dim3 dimGrid((d_new_shift.height + dimBlock.x - 1) / dimBlock.x
, (d_new_shift.width + dimBlock.y - 1) / dimBlock.y);
if (first_iter && params.verbose){
printf("calculate_denominator called with:\n");
printf("dimBlock.x = %d, dimBlock.y = %d\n", dimBlock.x, dimBlock.y);
printf("dimGrid.x = %d, dimGrid.y = %d\n\n", dimGrid.x, dimGrid.y);
first_iter = false;
}
denominator_kernel<<<dimGrid, dimBlock>>>(d_denominator_matrix, d_kernel_matrix, T);
gpuErrchk( cudaPeekAtLastError() );
gpuErrchk( cudaDeviceSynchronize() );
size = NUMBER_OF_POINTS sizeof(double);
gpuErrchk( cudaMemcpy(&((*denominator)[0]), d_denominator_matrix.elements
,size, cudaMemcpyDeviceToHost) );
gpuErrchk( cudaFree(d_kernel_matrix.elements) );
gpuErrchk( cudaFree(d_original_points.elements) );
gpuErrchk( cudaFree(d_new_shift.elements) );
}

4
mean_shift_cuda/meanshift_utils.h

@ -71,4 +71,8 @@ void print_matrix(double **array, int rows, int cols);
void save_matrix(double **matrix
, int iteration);
//Function calculate_denominator allocates memory in GPU, sends the data and calls the
//denominator kernel function.
void calculate_denominator(double **kernel_matrix);
#endif //SERIAL_UTILS_H
Loading…
Cancel
Save