Browse Source

2D memory alloc, norm refactored as __device__ function

master
anapt 7 years ago
parent
commit
26869bc13a
  1. 111
      meanshift.c

111
meanshift.c

@ -1,8 +1,11 @@
#include <stdio.h>
#include <stdlib.h>
#include <sys/time.h>
#include <math.h>
#include <float.h>
#include "serial_declarations.h"
#define N 512
int NUMBER_OF_POINTS = 600;
int DIMENSIONS = 2;
@ -15,6 +18,19 @@ double seq_time;
int meanshift(double **original_points, double ***shifted_points, int h
, parameters *opt, int iteration);
__device__ double norm(double **matrix, int rows, int cols){
double sum=0, temp_mul=0;
for (int i=0; i<rows; i++) {
for (int j=0; j<cols; j++) {
temp_mul = matrix[i][j] * matrix[i][j];
sum = sum + temp_mul;
}
}
double norm = sqrt(sum);
return norm;
}
int main(int argc, char **argv){
int h = 1;
@ -74,6 +90,8 @@ int main(int argc, char **argv){
//save_matrix(shifted_points, iterations);
}
int meanshift(double **original_points, double ***shifted_points, int h
, parameters *opt, int iteration){
@ -102,33 +120,40 @@ int meanshift(double **original_points, double ***shifted_points, int h
double * d_kernel_matrix;
cudaMalloc(&d_kernel_matrix, NUMBER_OF_POINTS * NUMBER_OF_POINTS * sizeof(double));
size_t pitch_kernel_matrix;
cudaMallocPitch(&d_kernel_matrix, &pitch_kernel_matrix,
NUMBER_OF_POINTS * sizeof(double), NUMBER_OF_POINTS);
double * d_denominator;
cudaMalloc(&d_denominator, NUMBER_OF_POINTS * sizeof(double));
double * d_new_shift;
cudaMalloc(&d_new_shift, NUMBER_OF_POINTS * DIMENSIONS * sizeof(double));
size_t pitch_new_shift;
cudaMallocPitch(&d_new_shift, &pitch_new_shift,
NUMBER_OF_POINTS * sizeof(double), DIMENSIONS);
// (*shifted_points) = alloc_2d_double(NUMBER_OF_POINTS, DIMENSIONS);
// duplicate(original_points, NUMBER_OF_POINTS, DIMENSIONS, shifted_points);
double * d_shifted_points;
cudaMalloc(&d_shifted_points, NUMBER_OF_POINTS * DIMENSIONS * sizeof(double));
size_t pitch_shifted_points;
cudaMallocPitch(&d_shifted_points, &pitch_shifted_points,
NUMBER_OF_POINTS * sizeof(double), DIMENSIONS);
// double **mean_shift_vector;
// mean_shift_vector = alloc_2d_double(NUMBER_OF_POINTS, DIMENSIONS);
double * d_mean_shift_vector;
cudaMalloc(&d_mean_shift_vector, NUMBER_OF_POINTS * DIMENSIONS * sizeof(double));
size_t pitch_mean_shift_vector;
cudaMallocPitch(&d_mean_shift_vector, &pitch_mean_shift_vector,
NUMBER_OF_POINTS * sizeof(double), DIMENSIONS);
//Copy vectors from host memory to device memory
cudaMemcpy(d_shifted_points, *shifted_points, NUMBER_OF_POINTS * DIMENSIONS * sizeof(double),
cudaMemcpyHostToDevice);
// y[i][j] == d_y[COLUMNS*i + j]
cudaMemcpy(d_mean_shift_vector, mean_shift_vector, NUMBER_OF_POINTS * DIMENSIONS * sizeof(double),
cudaMemcpyHostToDevice);
cudaMemcpy2D(d_shifted_points, NUMBER_OF_POINTS * sizeof(double), *shifted_points,
pitch_shifted_points, NUMBER_OF_POINTS * sizeof(double),
DIMENSIONS, cudaMemcpyHostToDevice);
cudaMemcpy2D(d_mean_shift_vector, NUMBER_OF_POINTS * sizeof(double), *mean_shift_vector,
pitch_mean_shift_vector, NUMBER_OF_POINTS * sizeof(double),
DIMENSIONS, cudaMemcpyHostToDevice);
// TODO REFACTOR AS A KERNEL
// find pairwise distance matrix (inside radius)
// [I, D] = rangesearch(x,y,h);
for (int i=0; i<NUMBER_OF_POINTS; i++){
double sum = 0;
for (int j=0; j<NUMBER_OF_POINTS; j++){
@ -192,3 +217,57 @@ int meanshift(double **original_points, double ***shifted_points, int h
return iteration;
}
/**
__global__ int iteration(double * kernel_matrix, double * denominator,
double * new_shift, double *shifted_points, double mean_shift_vector,
int NUMBER_OF_POINTS, int DIMENSIONS, int h){
int i = threadIdx.x + blockIdx.x * blockDim.x;
for (i = 0; i < NUMBER_OF_POINTS; i++) {
double sum = 0;
for (int j = 0; j < NUMBER_OF_POINTS; j++) {
double dist_sum = 0;
for (int p = 0; p < DIMENSIONS; p++) {
double dif = ((*shifted_points)[i])[p] - (original_points[j])[p];
dist_sum += dif * dif;
}
double dist = sqrt(dist_sum);
if (dist < h * h) {
kernel_matrix[i][j] = dist * dist;
// compute kernel matrix
double pow = ((-1) * (kernel_matrix[i][j])) / (2 * (h * h));
kernel_matrix[i][j] = exp(pow);
} else {
kernel_matrix[i][j] = 0;
}
if (i == j) {
kernel_matrix[i][j] += 1;
}
sum = sum + kernel_matrix[i][j];
}
denominator[i] = sum;
// build nominator
for (int j = 0; j < DIMENSIONS; j++) {
new_shift[i][j] = 0;
for (int k = 0; k < NUMBER_OF_POINTS; k++) {
new_shift[i][j] += kernel_matrix[i][k] * original_points[k][j];
}
// divide element-wise
new_shift[i][j] = new_shift[i][j] / denominator[i];
// calculate mean-shift vector at the same time
mean_shift_vector[i][j] = new_shift[i][j] - (*shifted_points)[i][j];
}
}
// frees previously shifted points, they're now garbage
free((*shifted_points)[0]);
// updates shifted points pointer to the new array address
shifted_points = &new_shift;
}
*/
Loading…
Cancel
Save