diff --git a/mean-shift.cu b/mean-shift.cu index a5bb681..a14d30a 100644 --- a/mean-shift.cu +++ b/mean-shift.cu @@ -113,7 +113,25 @@ void meanshift(double **x, int h, struct parameters *opt){ /** iterate until convergence **/ // printf("norm : %f \n", norm(m, ROWS, COLUMNS)); + /** allocate memory **/ + double ** W = alloc_2d_double(ROWS, ROWS); + double * l = malloc(ROWS * sizeof(double)); + double * d_W; + cudaMalloc(&d_W, ROWS * ROWS * sizeof(double)); + double * d_I; + cudaMalloc(&d_I, ROWS * sizeof(double)); + double * d_y_new; + cudaMalloc(&d_y_new, ROWS * COLUMNS * sizeof(double)); + + double * d_y; + cudaMalloc(&d_y, ROWS * COLUMNS * sizeof(double)); + double * d_m; + cudaMalloc(&d_m, ROWS * COLUMNS * sizeof(double)); + + //Copy vectors from host memory to device memory + cudaMemcpy(d_y, y, ROWS * COLUMNS * sizeof(double), cudaMemcpyHostToDevice); + cudaMemcpy(d_m, m, ROWS * COLUMNS * sizeof(double), cudaMemcpyHostToDevice); while (norm(m, ROWS, COLUMNS) > opt->epsilon) { @@ -262,4 +280,88 @@ void print_matrix(double ** array, int rows, int cols){ } } -__global__ void \ No newline at end of file +__global__ void iteration (double norm, double epsilon){ + // TODO check if they also need cudamalloc + int iter; + int i = blockDim.x * blockIdx.x + threadIdx.x; + int j = blockDim.x * blockIdx.x + threadIdx.x; + while (norm > epsilon){ + // TODO ITERATION + iter = iter +1; + // find pairwise distance matrix (inside radius) + /** allocate memory for inside iteration arrays **/ + // TODO ALLOCATE MEMORY BEFORE CALLING KERNEL +// double ** W = alloc_2d_double(ROWS, ROWS); +// double * l = malloc(ROWS * sizeof(double)); + // [I, D] = rangesearch(x,y,h); + for (int i=0; i apply to non-zero elements + for (int i=0; i