|
@ -1,8 +1,11 @@ |
|
|
#include <stdio.h> |
|
|
#include <stdio.h> |
|
|
#include <stdlib.h> |
|
|
#include <stdlib.h> |
|
|
#include <sys/time.h> |
|
|
#include <sys/time.h> |
|
|
|
|
|
#include <math.h> |
|
|
|
|
|
#include <float.h> |
|
|
|
|
|
|
|
|
#include "serial_declarations.h" |
|
|
#include "serial_declarations.h" |
|
|
|
|
|
#define N 512 |
|
|
|
|
|
|
|
|
int NUMBER_OF_POINTS = 600; |
|
|
int NUMBER_OF_POINTS = 600; |
|
|
int DIMENSIONS = 2; |
|
|
int DIMENSIONS = 2; |
|
@ -15,6 +18,19 @@ double seq_time; |
|
|
int meanshift(double **original_points, double ***shifted_points, int h |
|
|
int meanshift(double **original_points, double ***shifted_points, int h |
|
|
, parameters *opt, int iteration); |
|
|
, parameters *opt, int iteration); |
|
|
|
|
|
|
|
|
|
|
|
__device__ double norm(double **matrix, int rows, int cols){ |
|
|
|
|
|
|
|
|
|
|
|
double sum=0, temp_mul=0; |
|
|
|
|
|
for (int i=0; i<rows; i++) { |
|
|
|
|
|
for (int j=0; j<cols; j++) { |
|
|
|
|
|
temp_mul = matrix[i][j] * matrix[i][j]; |
|
|
|
|
|
sum = sum + temp_mul; |
|
|
|
|
|
} |
|
|
|
|
|
} |
|
|
|
|
|
double norm = sqrt(sum); |
|
|
|
|
|
return norm; |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
int main(int argc, char **argv){ |
|
|
int main(int argc, char **argv){ |
|
|
int h = 1; |
|
|
int h = 1; |
|
|
|
|
|
|
|
@ -74,6 +90,8 @@ int main(int argc, char **argv){ |
|
|
//save_matrix(shifted_points, iterations);
|
|
|
//save_matrix(shifted_points, iterations);
|
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
int meanshift(double **original_points, double ***shifted_points, int h |
|
|
int meanshift(double **original_points, double ***shifted_points, int h |
|
|
, parameters *opt, int iteration){ |
|
|
, parameters *opt, int iteration){ |
|
|
|
|
|
|
|
@ -102,33 +120,40 @@ int meanshift(double **original_points, double ***shifted_points, int h |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
double * d_kernel_matrix; |
|
|
double * d_kernel_matrix; |
|
|
cudaMalloc(&d_kernel_matrix, NUMBER_OF_POINTS * NUMBER_OF_POINTS * sizeof(double)); |
|
|
size_t pitch_kernel_matrix; |
|
|
|
|
|
cudaMallocPitch(&d_kernel_matrix, &pitch_kernel_matrix, |
|
|
|
|
|
NUMBER_OF_POINTS * sizeof(double), NUMBER_OF_POINTS); |
|
|
|
|
|
|
|
|
double * d_denominator; |
|
|
double * d_denominator; |
|
|
cudaMalloc(&d_denominator, NUMBER_OF_POINTS * sizeof(double)); |
|
|
cudaMalloc(&d_denominator, NUMBER_OF_POINTS * sizeof(double)); |
|
|
|
|
|
|
|
|
double * d_new_shift; |
|
|
double * d_new_shift; |
|
|
cudaMalloc(&d_new_shift, NUMBER_OF_POINTS * DIMENSIONS * sizeof(double)); |
|
|
size_t pitch_new_shift; |
|
|
|
|
|
cudaMallocPitch(&d_new_shift, &pitch_new_shift, |
|
|
|
|
|
NUMBER_OF_POINTS * sizeof(double), DIMENSIONS); |
|
|
|
|
|
|
|
|
// (*shifted_points) = alloc_2d_double(NUMBER_OF_POINTS, DIMENSIONS);
|
|
|
|
|
|
// duplicate(original_points, NUMBER_OF_POINTS, DIMENSIONS, shifted_points);
|
|
|
|
|
|
double * d_shifted_points; |
|
|
double * d_shifted_points; |
|
|
cudaMalloc(&d_shifted_points, NUMBER_OF_POINTS * DIMENSIONS * sizeof(double)); |
|
|
size_t pitch_shifted_points; |
|
|
|
|
|
cudaMallocPitch(&d_shifted_points, &pitch_shifted_points, |
|
|
|
|
|
NUMBER_OF_POINTS * sizeof(double), DIMENSIONS); |
|
|
|
|
|
|
|
|
// double **mean_shift_vector;
|
|
|
|
|
|
// mean_shift_vector = alloc_2d_double(NUMBER_OF_POINTS, DIMENSIONS);
|
|
|
|
|
|
double * d_mean_shift_vector; |
|
|
double * d_mean_shift_vector; |
|
|
cudaMalloc(&d_mean_shift_vector, NUMBER_OF_POINTS * DIMENSIONS * sizeof(double)); |
|
|
size_t pitch_mean_shift_vector; |
|
|
|
|
|
cudaMallocPitch(&d_mean_shift_vector, &pitch_mean_shift_vector, |
|
|
|
|
|
NUMBER_OF_POINTS * sizeof(double), DIMENSIONS); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
//Copy vectors from host memory to device memory
|
|
|
cudaMemcpy2D(d_shifted_points, NUMBER_OF_POINTS * sizeof(double), *shifted_points, |
|
|
cudaMemcpy(d_shifted_points, *shifted_points, NUMBER_OF_POINTS * DIMENSIONS * sizeof(double), |
|
|
pitch_shifted_points, NUMBER_OF_POINTS * sizeof(double), |
|
|
cudaMemcpyHostToDevice); |
|
|
DIMENSIONS, cudaMemcpyHostToDevice); |
|
|
// y[i][j] == d_y[COLUMNS*i + j]
|
|
|
|
|
|
cudaMemcpy(d_mean_shift_vector, mean_shift_vector, NUMBER_OF_POINTS * DIMENSIONS * sizeof(double), |
|
|
cudaMemcpy2D(d_mean_shift_vector, NUMBER_OF_POINTS * sizeof(double), *mean_shift_vector, |
|
|
cudaMemcpyHostToDevice); |
|
|
pitch_mean_shift_vector, NUMBER_OF_POINTS * sizeof(double), |
|
|
|
|
|
DIMENSIONS, cudaMemcpyHostToDevice); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// TODO REFACTOR AS A KERNEL
|
|
|
// TODO REFACTOR AS A KERNEL
|
|
|
// find pairwise distance matrix (inside radius)
|
|
|
|
|
|
// [I, D] = rangesearch(x,y,h);
|
|
|
|
|
|
for (int i=0; i<NUMBER_OF_POINTS; i++){ |
|
|
for (int i=0; i<NUMBER_OF_POINTS; i++){ |
|
|
double sum = 0; |
|
|
double sum = 0; |
|
|
for (int j=0; j<NUMBER_OF_POINTS; j++){ |
|
|
for (int j=0; j<NUMBER_OF_POINTS; j++){ |
|
@ -192,3 +217,57 @@ int meanshift(double **original_points, double ***shifted_points, int h |
|
|
|
|
|
|
|
|
return iteration; |
|
|
return iteration; |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
|
|
|
|
|
|
|
__global__ int iteration(double * kernel_matrix, double * denominator, |
|
|
|
|
|
double * new_shift, double *shifted_points, double mean_shift_vector, |
|
|
|
|
|
int NUMBER_OF_POINTS, int DIMENSIONS, int h){ |
|
|
|
|
|
|
|
|
|
|
|
int i = threadIdx.x + blockIdx.x * blockDim.x; |
|
|
|
|
|
for (i = 0; i < NUMBER_OF_POINTS; i++) { |
|
|
|
|
|
double sum = 0; |
|
|
|
|
|
for (int j = 0; j < NUMBER_OF_POINTS; j++) { |
|
|
|
|
|
double dist_sum = 0; |
|
|
|
|
|
for (int p = 0; p < DIMENSIONS; p++) { |
|
|
|
|
|
double dif = ((*shifted_points)[i])[p] - (original_points[j])[p]; |
|
|
|
|
|
dist_sum += dif * dif; |
|
|
|
|
|
} |
|
|
|
|
|
double dist = sqrt(dist_sum); |
|
|
|
|
|
|
|
|
|
|
|
if (dist < h * h) { |
|
|
|
|
|
kernel_matrix[i][j] = dist * dist; |
|
|
|
|
|
// compute kernel matrix
|
|
|
|
|
|
double pow = ((-1) * (kernel_matrix[i][j])) / (2 * (h * h)); |
|
|
|
|
|
kernel_matrix[i][j] = exp(pow); |
|
|
|
|
|
} else { |
|
|
|
|
|
kernel_matrix[i][j] = 0; |
|
|
|
|
|
} |
|
|
|
|
|
if (i == j) { |
|
|
|
|
|
kernel_matrix[i][j] += 1; |
|
|
|
|
|
} |
|
|
|
|
|
sum = sum + kernel_matrix[i][j]; |
|
|
|
|
|
} |
|
|
|
|
|
denominator[i] = sum; |
|
|
|
|
|
|
|
|
|
|
|
// build nominator
|
|
|
|
|
|
for (int j = 0; j < DIMENSIONS; j++) { |
|
|
|
|
|
new_shift[i][j] = 0; |
|
|
|
|
|
for (int k = 0; k < NUMBER_OF_POINTS; k++) { |
|
|
|
|
|
new_shift[i][j] += kernel_matrix[i][k] * original_points[k][j]; |
|
|
|
|
|
} |
|
|
|
|
|
// divide element-wise
|
|
|
|
|
|
new_shift[i][j] = new_shift[i][j] / denominator[i]; |
|
|
|
|
|
// calculate mean-shift vector at the same time
|
|
|
|
|
|
mean_shift_vector[i][j] = new_shift[i][j] - (*shifted_points)[i][j]; |
|
|
|
|
|
} |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
// frees previously shifted points, they're now garbage
|
|
|
|
|
|
free((*shifted_points)[0]); |
|
|
|
|
|
// updates shifted points pointer to the new array address
|
|
|
|
|
|
shifted_points = &new_shift; |
|
|
|
|
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
*/ |