anapt
7 years ago
2 changed files with 0 additions and 390 deletions
@ -1,21 +0,0 @@ |
|||
|
|||
// Host code |
|||
int width = 64, height = 64; |
|||
float* devPtr; |
|||
size_t pitch; |
|||
cudaMallocPitch(&devPtr, &pitch, width * sizeof(float), height); |
|||
MyKernel<<<100, 512>>>(devPtr, pitch, width, height); |
|||
|
|||
|
|||
// Device code |
|||
__global__ voidMyKernel(float* devPtr, size_t pitch, int width, int height) { |
|||
for (int r = 0; r < height; ++r) { |
|||
float* row = (float*)((char*)devPtr + r * pitch); |
|||
for (int c = 0; c < width; ++c) { |
|||
float element = row[c]; |
|||
} |
|||
} |
|||
} |
|||
|
|||
Read more at: http://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#ixzz54kTh80mZ |
|||
Follow us: @GPUComputing on Twitter | NVIDIA on Facebook |
@ -1,369 +0,0 @@ |
|||
#include <stdio.h> |
|||
#include <stdlib.h> |
|||
#include <sys/time.h> |
|||
#include <stdbool.h> |
|||
#include <math.h> |
|||
|
|||
#define X "data/X.bin" |
|||
#define L "data/L.bin" |
|||
#define COLUMNS 2 |
|||
#define ROWS 600 |
|||
|
|||
struct parameters { |
|||
double epsilon; |
|||
bool verbose; |
|||
bool display; |
|||
}; |
|||
|
|||
double **alloc_2d_double(int rows, int cols); |
|||
double **duplicate(double **a, double **b, int rows, int cols); |
|||
void meanshift(double **x, int h, struct parameters *opt); |
|||
double norm(double ** m, int rows, int cols); |
|||
void multiply(double ** matrix1, double ** matrix2, double ** output); |
|||
double calculateDistance(double *, double *); |
|||
void print_matrix(double ** array, int rows, int cols); |
|||
|
|||
|
|||
struct timeval startwtime, endwtime; |
|||
double seq_time; |
|||
|
|||
int main(int argc, char **argv){ |
|||
|
|||
// if (argc<2){ |
|||
// printf("%s\n", "Specify the k"); |
|||
// return 1; |
|||
// } |
|||
// = atoi(argv[1]); // the k-parameter |
|||
|
|||
|
|||
FILE *f; |
|||
// f = fopen(X, "rb"); |
|||
// fseek(f, 0L, SEEK_END); |
|||
// long int pos = ftell(f); |
|||
// fclose(f); |
|||
// int elements = pos / sizeof(double); // number of total elements (points*dimension) |
|||
// int points = elements/COLUMNS; |
|||
// //printf("points : %d \n", points); |
|||
f = fopen(X, "rb"); |
|||
double ** vectors; |
|||
vectors = alloc_2d_double(ROWS, COLUMNS); |
|||
for (int i=0; i<ROWS; i++){ |
|||
int out = fread(vectors[i], sizeof(double), COLUMNS, f); |
|||
} |
|||
//printf("test : %f \n", vectors[0][0]); |
|||
//printf("test : %f \n", vectors[ROWS-1][COLUMNS-1]); |
|||
|
|||
// initializing file that will contain the labels (train) |
|||
f = fopen(L, "rb"); |
|||
// NOTE : Labels were classified as <class 'numpy.uint8'> |
|||
// variables of type uint8 are stored as 1-byte (8-bit) unsigned integers |
|||
fseek(f, 0L, SEEK_END); |
|||
long int pos = ftell(f); |
|||
rewind(f); |
|||
//printf("position : %ld \n", pos); |
|||
int label_elements = pos/ sizeof(char); |
|||
char *labels = (char*)malloc(label_elements* sizeof(char)); |
|||
fseek(f, 0L, SEEK_SET); |
|||
int out = fread(labels, sizeof(char), label_elements, f); |
|||
fclose(f); |
|||
|
|||
// MEAN SHIFT OPTIONS |
|||
int h = 1; |
|||
struct parameters params; |
|||
params.epsilon = 0.0001; |
|||
params.verbose = false; |
|||
params.display = false; |
|||
struct parameters *opt; |
|||
opt = ¶ms; |
|||
|
|||
// tic |
|||
gettimeofday (&startwtime, NULL); |
|||
|
|||
meanshift(vectors, h, opt); |
|||
|
|||
// toc |
|||
gettimeofday (&endwtime, NULL); |
|||
seq_time = (double)((endwtime.tv_usec - startwtime.tv_usec)/1.0e6 + endwtime.tv_sec - startwtime.tv_sec); |
|||
printf("%s wall clock time = %f\n","Mean Shift", seq_time); |
|||
|
|||
//TODO write output points to file -> plot later |
|||
|
|||
} |
|||
|
|||
void meanshift(double **x, int h, struct parameters *opt){ |
|||
|
|||
double **y; |
|||
y = alloc_2d_double(ROWS, COLUMNS); |
|||
y = duplicate(x, y, ROWS, COLUMNS); |
|||
|
|||
// mean shift vectors |
|||
double **m; |
|||
m = alloc_2d_double(ROWS, COLUMNS); |
|||
// initialize elements of m to inf |
|||
for (int i=0;i<ROWS;i++){ |
|||
for (int j=0;j<COLUMNS;j++){ |
|||
m[i][j] = DBL_MAX; |
|||
} |
|||
} |
|||
|
|||
// initialize iteration counter |
|||
int iter = 0; |
|||
|
|||
// printf("%f \n", opt->epsilon); |
|||
|
|||
/** iterate until convergence **/ |
|||
// printf("norm : %f \n", norm(m, ROWS, COLUMNS)); |
|||
/** allocate memory **/ |
|||
double ** W = alloc_2d_double(ROWS, ROWS); |
|||
double * l = malloc(ROWS * sizeof(double)); |
|||
|
|||
double * d_W; |
|||
cudaMalloc(&d_W, ROWS * ROWS * sizeof(double)); |
|||
double * d_I; |
|||
cudaMalloc(&d_I, ROWS * sizeof(double)); |
|||
double * d_y_new; |
|||
cudaMalloc(&d_y_new, ROWS * COLUMNS * sizeof(double)); |
|||
|
|||
double * d_y; |
|||
cudaMalloc(&d_y, ROWS * COLUMNS * sizeof(double)); |
|||
double * d_m; |
|||
cudaMalloc(&d_m, ROWS * COLUMNS * sizeof(double)); |
|||
|
|||
//Copy vectors from host memory to device memory |
|||
cudaMemcpy(d_y, y, ROWS * COLUMNS * sizeof(double), cudaMemcpyHostToDevice); |
|||
// y[i][j] == d_y[COLUMNS*i + j] |
|||
cudaMemcpy(d_m, m, ROWS * COLUMNS * sizeof(double), cudaMemcpyHostToDevice); |
|||
|
|||
|
|||
while (norm(m, ROWS, COLUMNS) > opt->epsilon) { |
|||
iter = iter +1; |
|||
// find pairwise distance matrix (inside radius) |
|||
/** allocate memory for inside iteration arrays **/ |
|||
double ** W = alloc_2d_double(ROWS, ROWS); |
|||
double * l = malloc(ROWS * sizeof(double)); |
|||
// [I, D] = rangesearch(x,y,h); |
|||
for (int i=0; i<ROWS; i++){ |
|||
for (int j=0; j<ROWS; j++){ |
|||
double dist = calculateDistance(y[i],x[j]); |
|||
|
|||
// 2sparse matrix |
|||
if (dist < h){ |
|||
W[i][j] = dist; |
|||
//printf("%f \n", W[i][j]); |
|||
}else{ |
|||
W[i][j] = 0; |
|||
} |
|||
} |
|||
} |
|||
|
|||
|
|||
// for each element of W (x) do x^2 |
|||
// size of W is [600 600] |
|||
// W is a sparse matrix -> apply to non-zero elements |
|||
for (int i=0; i<ROWS; i++){ |
|||
double sum =0; |
|||
for (int j=0; j < ROWS; j++){ |
|||
if (W[i][j] != 0){ |
|||
W[i][j] = W[i][j]*W[i][j]; |
|||
// compute kernel matrix |
|||
// apply function to non zero elements of a sparse matrix |
|||
double pow = ((-1)*(W[i][j]))/(2*(h*h)); |
|||
W[i][j] = exp(pow); |
|||
} |
|||
// make sure diagonal elements are 1 |
|||
if (i==j){ |
|||
W[i][j] = W[i][j] +1; |
|||
} |
|||
// calculate sum(W,2) |
|||
sum = sum + W[i][j]; |
|||
} |
|||
/** l array is correct**/ |
|||
l[i] = sum; |
|||
// printf("l[%d] : %f \n", i, l[i]); |
|||
} |
|||
/** W is correct**/ |
|||
//print_matrix(W, ROWS, ROWS); |
|||
|
|||
|
|||
// create new y vector |
|||
double** y_new = alloc_2d_double(ROWS, COLUMNS); |
|||
|
|||
multiply(W, x, y_new); |
|||
/** y_new is CORRECT **/ |
|||
// print_matrix(y_new, ROWS, COLUMNS); |
|||
// divide element-wise |
|||
for (int i=0; i<ROWS; i++){ |
|||
for (int j=0; j<COLUMNS; j++){ |
|||
y_new[i][j] = y_new[i][j] / l[i]; |
|||
} |
|||
} |
|||
|
|||
// calculate mean-shift vector |
|||
for (int i=0; i<ROWS; i++){ |
|||
for (int j=0; j<COLUMNS; j++){ |
|||
m[i][j] = y_new[i][j] - y[i][j]; |
|||
|
|||
// update y |
|||
y[i][j] = y_new[i][j]; |
|||
} |
|||
} |
|||
|
|||
printf("Iteration n. %d, error %f \n", iter, norm(m, ROWS, COLUMNS)); |
|||
// TODO maybe keep y for live display later? |
|||
}; |
|||
|
|||
|
|||
|
|||
} |
|||
|
|||
// allocates a 2d array in continuous memory positions |
|||
double **alloc_2d_double(int rows, int cols) { |
|||
double *data = (double *)malloc(rows*cols*sizeof(double)); |
|||
double **array= (double **)malloc(rows*sizeof(double*)); |
|||
for (int i=0; i<rows; i++) |
|||
array[i] = &(data[cols*i]); |
|||
return array; |
|||
} |
|||
|
|||
// copy the values of a 2d double array to another |
|||
double **duplicate(double **a, double **b, int rows, int cols){ |
|||
for (int i=0;i<rows;i++){ |
|||
for (int j=0;j<cols;j++){ |
|||
b[i][j] = a[i][j]; |
|||
} |
|||
} |
|||
return b; |
|||
} |
|||
|
|||
// TODO check why there's is a difference in the norm calculate in matlab |
|||
double norm(double ** m, int rows, int cols){ |
|||
double sum=0, a=0; |
|||
for (int i = 0; i < rows; i++) { |
|||
for (int j = 0; j < cols; j++) { |
|||
a = m[i][j] * m[i][j]; |
|||
sum = sum + a; |
|||
} |
|||
} |
|||
double norm = sqrt(sum); |
|||
return norm; |
|||
} |
|||
|
|||
double calculateDistance(double *y, double *x){ |
|||
double sum = 0, dif; |
|||
for (int i=0;i<COLUMNS;i++){ |
|||
dif = y[i]-x[i]; |
|||
sum += dif * dif; |
|||
} |
|||
double distance = sqrt(sum); |
|||
return distance; |
|||
} |
|||
|
|||
void multiply(double ** matrix1, double ** matrix2, double ** output){ |
|||
// W dims are ROWS ROWS and x dims are ROWS COLUMNS |
|||
|
|||
int i, j, k; |
|||
for (i=0; i<ROWS; i++){ |
|||
for (j=0; j<COLUMNS; j++){ |
|||
output[i][j] = 0; |
|||
for (k=0; k<ROWS; k++){ |
|||
output[i][j] += matrix1[i][k] * matrix2[k][j]; |
|||
} |
|||
} |
|||
} |
|||
} |
|||
|
|||
void print_matrix(double ** array, int rows, int cols){ |
|||
for (int i=0; i<cols; i++){ |
|||
for (int j=0; j<rows; j++){ |
|||
printf("%f ", array[j][i]); |
|||
} |
|||
printf("\n"); |
|||
} |
|||
} |
|||
|
|||
__global__ void iteration (double* W, double epsilon){ |
|||
// TODO check if they also need cudamalloc |
|||
// todo find how to keep counter |
|||
int iter; |
|||
int i = blockDim.x * blockIdx.x + threadIdx.x; |
|||
int j = blockDim.x * blockIdx.x + threadIdx.x; |
|||
while (norm > epsilon){ |
|||
// TODO ITERATION |
|||
iter = iter +1; |
|||
// find pairwise distance matrix (inside radius) |
|||
/** allocate memory for inside iteration arrays **/ |
|||
// TODO ALLOCATE MEMORY BEFORE CALLING KERNEL |
|||
// double ** W = alloc_2d_double(ROWS, ROWS); |
|||
// double * l = malloc(ROWS * sizeof(double)); |
|||
// [I, D] = rangesearch(x,y,h); |
|||
for (int i=0; i<ROWS; i++){ |
|||
for (int j=0; j<ROWS; j++){ |
|||
// TODO REFACTOR CALCULATE DISTANCE |
|||
double dist = calculateDistance(y[i],x[j]); |
|||
|
|||
// 2sparse matrix |
|||
if (dist < h){ |
|||
W[i][j] = dist; |
|||
//printf("%f \n", W[i][j]); |
|||
}else{ |
|||
W[i][j] = 0; |
|||
} |
|||
} |
|||
} |
|||
|
|||
|
|||
// for each element of W (x) do x^2 |
|||
// size of W is [600 600] |
|||
// W is a sparse matrix -> apply to non-zero elements |
|||
for (int i=0; i<ROWS; i++){ |
|||
double sum =0; |
|||
for (int j=0; j < ROWS; j++){ |
|||
if (W[i][j] != 0){ |
|||
W[i][j] = W[i][j]*W[i][j]; |
|||
// compute kernel matrix |
|||
// apply function to non zero elements of a sparse matrix |
|||
double pow = ((-1)*(W[i][j]))/(2*(h*h)); |
|||
W[i][j] = exp(pow); |
|||
} |
|||
// make sure diagonal elements are 1 |
|||
if (i==j){ |
|||
W[i][j] = W[i][j] +1; |
|||
} |
|||
// calculate sum(W,2) |
|||
sum = sum + W[i][j]; |
|||
} |
|||
/** l array is correct**/ |
|||
l[i] = sum; |
|||
// printf("l[%d] : %f \n", i, l[i]); |
|||
} |
|||
/** W is correct**/ |
|||
//print_matrix(W, ROWS, ROWS); |
|||
|
|||
|
|||
// create new y vector |
|||
double** y_new = alloc_2d_double(ROWS, COLUMNS); |
|||
|
|||
multiply(W, x, y_new); |
|||
/** y_new is CORRECT **/ |
|||
// print_matrix(y_new, ROWS, COLUMNS); |
|||
// divide element-wise |
|||
for (int i=0; i<ROWS; i++){ |
|||
for (int j=0; j<COLUMNS; j++){ |
|||
y_new[i][j] = y_new[i][j] / l[i]; |
|||
} |
|||
} |
|||
|
|||
// calculate mean-shift vector |
|||
for (int i=0; i<ROWS; i++){ |
|||
for (int j=0; j<COLUMNS; j++){ |
|||
m[i][j] = y_new[i][j] - y[i][j]; |
|||
|
|||
// update y |
|||
y[i][j] = y_new[i][j]; |
|||
} |
|||
} |
|||
|
|||
printf("Iteration n. %d, error %f \n", iter, norm(m, ROWS, COLUMNS)); |
|||
// TODO maybe keep y for live display later? |
|||
} |
|||
} |
Loading…
Reference in new issue