diff --git a/cuda_helpers.cu b/cuda_helpers.cu new file mode 100644 index 0000000..06433d8 --- /dev/null +++ b/cuda_helpers.cu @@ -0,0 +1,21 @@ + +// Host code +int width = 64, height = 64; +float* devPtr; +size_t pitch; +cudaMallocPitch(&devPtr, &pitch, width * sizeof(float), height); +MyKernel<<<100, 512>>>(devPtr, pitch, width, height); + + +// Device code +__global__ voidMyKernel(float* devPtr, size_t pitch, int width, int height) { + for (int r = 0; r < height; ++r) { + float* row = (float*)((char*)devPtr + r * pitch); + for (int c = 0; c < width; ++c) { + float element = row[c]; + } + } +} + +Read more at: http://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#ixzz54kTh80mZ +Follow us: @GPUComputing on Twitter | NVIDIA on Facebook \ No newline at end of file diff --git a/mean-shift.cu b/mean-shift.cu index a14d30a..916c94a 100644 --- a/mean-shift.cu +++ b/mean-shift.cu @@ -131,6 +131,7 @@ void meanshift(double **x, int h, struct parameters *opt){ //Copy vectors from host memory to device memory cudaMemcpy(d_y, y, ROWS * COLUMNS * sizeof(double), cudaMemcpyHostToDevice); + // y[i][j] == d_y[COLUMNS*i + j] cudaMemcpy(d_m, m, ROWS * COLUMNS * sizeof(double), cudaMemcpyHostToDevice); @@ -280,8 +281,9 @@ void print_matrix(double ** array, int rows, int cols){ } } -__global__ void iteration (double norm, double epsilon){ +__global__ void iteration (double* W, double epsilon){ // TODO check if they also need cudamalloc + // todo find how to keep counter int iter; int i = blockDim.x * blockIdx.x + threadIdx.x; int j = blockDim.x * blockIdx.x + threadIdx.x;