diff --git a/cuda_helpers.cu b/cuda_helpers.cu deleted file mode 100644 index 06433d8..0000000 --- a/cuda_helpers.cu +++ /dev/null @@ -1,21 +0,0 @@ - -// Host code -int width = 64, height = 64; -float* devPtr; -size_t pitch; -cudaMallocPitch(&devPtr, &pitch, width * sizeof(float), height); -MyKernel<<<100, 512>>>(devPtr, pitch, width, height); - - -// Device code -__global__ voidMyKernel(float* devPtr, size_t pitch, int width, int height) { - for (int r = 0; r < height; ++r) { - float* row = (float*)((char*)devPtr + r * pitch); - for (int c = 0; c < width; ++c) { - float element = row[c]; - } - } -} - -Read more at: http://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#ixzz54kTh80mZ -Follow us: @GPUComputing on Twitter | NVIDIA on Facebook \ No newline at end of file diff --git a/mean-shift.cu b/mean-shift.cu deleted file mode 100644 index 916c94a..0000000 --- a/mean-shift.cu +++ /dev/null @@ -1,369 +0,0 @@ -#include -#include -#include -#include -#include - -#define X "data/X.bin" -#define L "data/L.bin" -#define COLUMNS 2 -#define ROWS 600 - -struct parameters { - double epsilon; - bool verbose; - bool display; -}; - -double **alloc_2d_double(int rows, int cols); -double **duplicate(double **a, double **b, int rows, int cols); -void meanshift(double **x, int h, struct parameters *opt); -double norm(double ** m, int rows, int cols); -void multiply(double ** matrix1, double ** matrix2, double ** output); -double calculateDistance(double *, double *); -void print_matrix(double ** array, int rows, int cols); - - -struct timeval startwtime, endwtime; -double seq_time; - -int main(int argc, char **argv){ - -// if (argc<2){ -// printf("%s\n", "Specify the k"); -// return 1; -// } -// = atoi(argv[1]); // the k-parameter - - - FILE *f; -// f = fopen(X, "rb"); -// fseek(f, 0L, SEEK_END); -// long int pos = ftell(f); -// fclose(f); -// int elements = pos / sizeof(double); // number of total elements (points*dimension) -// int points = elements/COLUMNS; -// //printf("points : %d \n", points); - f = fopen(X, "rb"); - double ** vectors; - vectors = alloc_2d_double(ROWS, COLUMNS); - for (int i=0; i - // variables of type uint8 are stored as 1-byte (8-bit) unsigned integers - fseek(f, 0L, SEEK_END); - long int pos = ftell(f); - rewind(f); - //printf("position : %ld \n", pos); - int label_elements = pos/ sizeof(char); - char *labels = (char*)malloc(label_elements* sizeof(char)); - fseek(f, 0L, SEEK_SET); - int out = fread(labels, sizeof(char), label_elements, f); - fclose(f); - - // MEAN SHIFT OPTIONS - int h = 1; - struct parameters params; - params.epsilon = 0.0001; - params.verbose = false; - params.display = false; - struct parameters *opt; - opt = ¶ms; - - // tic - gettimeofday (&startwtime, NULL); - - meanshift(vectors, h, opt); - - // toc - gettimeofday (&endwtime, NULL); - seq_time = (double)((endwtime.tv_usec - startwtime.tv_usec)/1.0e6 + endwtime.tv_sec - startwtime.tv_sec); - printf("%s wall clock time = %f\n","Mean Shift", seq_time); - - //TODO write output points to file -> plot later - -} - -void meanshift(double **x, int h, struct parameters *opt){ - - double **y; - y = alloc_2d_double(ROWS, COLUMNS); - y = duplicate(x, y, ROWS, COLUMNS); - - // mean shift vectors - double **m; - m = alloc_2d_double(ROWS, COLUMNS); - // initialize elements of m to inf - for (int i=0;iepsilon); - - /** iterate until convergence **/ - // printf("norm : %f \n", norm(m, ROWS, COLUMNS)); - /** allocate memory **/ - double ** W = alloc_2d_double(ROWS, ROWS); - double * l = malloc(ROWS * sizeof(double)); - - double * d_W; - cudaMalloc(&d_W, ROWS * ROWS * sizeof(double)); - double * d_I; - cudaMalloc(&d_I, ROWS * sizeof(double)); - double * d_y_new; - cudaMalloc(&d_y_new, ROWS * COLUMNS * sizeof(double)); - - double * d_y; - cudaMalloc(&d_y, ROWS * COLUMNS * sizeof(double)); - double * d_m; - cudaMalloc(&d_m, ROWS * COLUMNS * sizeof(double)); - - //Copy vectors from host memory to device memory - cudaMemcpy(d_y, y, ROWS * COLUMNS * sizeof(double), cudaMemcpyHostToDevice); - // y[i][j] == d_y[COLUMNS*i + j] - cudaMemcpy(d_m, m, ROWS * COLUMNS * sizeof(double), cudaMemcpyHostToDevice); - - - while (norm(m, ROWS, COLUMNS) > opt->epsilon) { - iter = iter +1; - // find pairwise distance matrix (inside radius) - /** allocate memory for inside iteration arrays **/ - double ** W = alloc_2d_double(ROWS, ROWS); - double * l = malloc(ROWS * sizeof(double)); - // [I, D] = rangesearch(x,y,h); - for (int i=0; i apply to non-zero elements - for (int i=0; i epsilon){ - // TODO ITERATION - iter = iter +1; - // find pairwise distance matrix (inside radius) - /** allocate memory for inside iteration arrays **/ - // TODO ALLOCATE MEMORY BEFORE CALLING KERNEL -// double ** W = alloc_2d_double(ROWS, ROWS); -// double * l = malloc(ROWS * sizeof(double)); - // [I, D] = rangesearch(x,y,h); - for (int i=0; i apply to non-zero elements - for (int i=0; i