diff --git a/README.md b/README.md
index fb31dc3..f370845 100644
--- a/README.md
+++ b/README.md
@@ -10,7 +10,7 @@
 
 This repository provides a serial implementation of the algorithm in C language, as well as the parallel equivalent in CUDA. The project was undertaken as part of the "Parallel and distributed systems" course of AUTH university.
 
-A [Gaussian] kernel was used for the weighting function. The code was tested for different data sets and information regarding the execution time and correctness were extracted. In addition, two versions of the parallel algorithm was tested and compared, with and without the usage of shared memory respectively.
+A [Gaussian] kernel was used for the weighting function. The code was tested for different data sets and information regarding the execution time and correctness were extracted. In addition, two versions of the parallel algorithm were tested and compared, with and without the usage of shared memory respectively.
 
 ## Compilation
 
diff --git a/serial.c b/serial.c
index 2cb5705..faafbc2 100755
--- a/serial.c
+++ b/serial.c
@@ -30,8 +30,8 @@ int main(int argc, char **argv){
     for (int i=0; i<ROWS; i++){
         int out = fread(vectors[i], sizeof(double), COLUMNS, f);
     }
-    //printf("test : %f \n", vectors[0][0]);
-    //printf("test : %f \n", vectors[ROWS-1][COLUMNS-1]);
+
+    save_matrix(vectors, 0);
 
     // initializing file that will contain the labels (train)
     f = fopen(L, "rb");
@@ -49,11 +49,11 @@ int main(int argc, char **argv){
 
     // MEAN SHIFT OPTIONS
     int h = 1;
-    struct parameters params;
+    parameters params;
     params.epsilon = 0.0001;
     params.verbose = false;
     params.display = false;
-    struct parameters *opt;
+    parameters *opt;
     opt = &params;
 
     // tic
diff --git a/serialDeclarations.c b/serialDeclarations.c
index 3effbbd..4947958 100644
--- a/serialDeclarations.c
+++ b/serialDeclarations.c
@@ -2,22 +2,23 @@
 #include <stdlib.h>
 #include <math.h>
 #include <float.h>
+#include <string.h>
 
 #include "serialDeclarations.h"
 
-void meanshift(double **x, int h, struct parameters *opt){
+void meanshift(double **originalPoints, int h, parameters *opt){
 
     double **y;
     y = alloc_2d_double(ROWS, COLUMNS);
-    y = duplicate(x, y, ROWS, COLUMNS);
+    y = duplicate(originalPoints, y, ROWS, COLUMNS);
 
-    // mean shift vectors
-    double **m;
-    m = alloc_2d_double(ROWS, COLUMNS);
-    // initialize elements of m to inf
+    // mean shift vector
+    double **meanShiftVector;
+    meanShiftVector = alloc_2d_double(ROWS, COLUMNS);
+    // initialize elements of meanShiftVector to inf
     for (int i=0;i<ROWS;i++){
         for (int j=0;j<COLUMNS;j++){
-            m[i][j] = DBL_MAX;
+            meanShiftVector[i][j] = DBL_MAX;
         }
     }
 
@@ -26,122 +27,86 @@ void meanshift(double **x, int h, struct parameters *opt){
 
     // printf("%f \n", opt->epsilon);
 
-    double ** W = alloc_2d_double(ROWS, ROWS);
-    double * l = malloc(ROWS * sizeof(double));
+    double ** kernelMatrix = alloc_2d_double(ROWS, ROWS);
+    double *denominator = malloc(ROWS * sizeof(double));
 
     /** iterate until convergence **/
     // printf("norm : %f \n", norm(m, ROWS, COLUMNS));
-
-    while (norm(m, ROWS, COLUMNS) > opt->epsilon) {
+    while (norm(meanShiftVector, ROWS, COLUMNS) > opt->epsilon) {
         iter = iter +1;
         // find pairwise distance matrix (inside radius)
-        /** allocate memory for inside iteration arrays **/
-
         // [I, D] = rangesearch(x,y,h);
         for (int i=0; i<ROWS; i++){
+            double sum =0;
             for (int j=0; j<ROWS; j++){
-                double dist = calculateDistance(y[i],x[j]);
-
-                // 2sparse matrix
-                if (dist < h){
-                    W[i][j] = dist;
-                    //printf("%f \n", W[i][j]);
-                }else{
-                    W[i][j] = 0;
-                }
-            }
-        }
-
+                double dist = calculateDistance(y[i],originalPoints[j]);
 
-        // for each element of W (x) do x^2
-        // size of W is [600 600]
-        // W is a sparse matrix -> apply to non-zero elements
-        for (int i=0; i<ROWS; i++){
-            double sum =0;
-            for (int j=0; j < ROWS; j++){
-                if (W[i][j] != 0){
-                    W[i][j] = W[i][j]*W[i][j];
+                if (i == j){
+                    kernelMatrix[i][j] = 1;
+                } else if (dist < h*h){
+                    kernelMatrix[i][j] = dist * dist;
                     // compute kernel matrix
-                    // apply function to non zero elements of a sparse matrix
-                    double pow = ((-1)*(W[i][j]))/(2*(h*h));
-                    W[i][j] = exp(pow);
+                    double pow = ((-1)*(kernelMatrix[i][j]))/(2*(h*h));
+                    kernelMatrix[i][j] = exp(pow);
+                } else {
+                    kernelMatrix[i][j] = 0;
                 }
-                // make sure diagonal elements are 1
-                if (i==j){
-                    W[i][j] = W[i][j] +1;
-                }
-                // calculate sum(W,2)
-                sum = sum + W[i][j];
+                sum = sum + kernelMatrix[i][j];
             }
-            /** l array is correct**/
-            l[i] = sum;
-            // printf("l[%d] : %f \n", i, l[i]);
+            denominator[i] = sum;
         }
-        /** W is correct**/
-        //print_matrix(W, ROWS, ROWS);
-
 
         // create new y vector
         double** y_new = alloc_2d_double(ROWS, COLUMNS);
 
-        multiply(W, x, y_new);
-        /** y_new is CORRECT **/
-        // print_matrix(y_new, ROWS, COLUMNS);
+        multiply(kernelMatrix, originalPoints, y_new);
         // divide element-wise
         for (int i=0; i<ROWS; i++){
             for (int j=0; j<COLUMNS; j++){
-                y_new[i][j] = y_new[i][j] / l[i];
+                y_new[i][j] = y_new[i][j] / denominator[i];
                 // calculate mean-shift vector
-                m[i][j] = y_new[i][j] - y[i][j];
+                meanShiftVector[i][j] = y_new[i][j] - y[i][j];
                 // update y
                 y[i][j] = y_new[i][j];
             }
         }
 
+        save_matrix(y, iter);
 
-        printf("Iteration n. %d, error %f \n", iter, norm(m, ROWS, COLUMNS));
+        printf("Iteration n. %d, error %f \n", iter, norm(meanShiftVector, ROWS, COLUMNS));
         // TODO maybe keep y for live display later?
-    };
-
-
-
-}
-
-// allocates a 2d array in continuous memory positions
-double **alloc_2d_double(int rows, int cols) {
-    double *data = (double *)malloc(rows*cols*sizeof(double));
-    double **array= (double **)malloc(rows*sizeof(double*));
-    for (int i=0; i<rows; i++)
-        array[i] = &(data[cols*i]);
-    return array;
-}
-
-// copy the values of a 2d double array to another
-double **duplicate(double **a, double **b, int rows, int cols){
-    for (int i=0;i<rows;i++){
-        for (int j=0;j<cols;j++){
-            b[i][j] = a[i][j];
-        }
     }
-    return b;
 }
 
 // TODO check why there's is a difference in the norm calculate in matlab
-double norm(double ** m, int rows, int cols){
-    double sum=0, a=0;
-    for (int i = 0; i < rows; i++) {
-        for (int j = 0; j < cols; j++) {
-            a = m[i][j] * m[i][j];
-            sum = sum + a;
+double norm(double **matrix, int rows, int cols){
+    double sum=0, tempMul=0;
+    for (int i=0; i<rows; i++) {
+        for (int j=0; j<cols; j++) {
+            tempMul = matrix[i][j] * matrix[i][j];
+            sum = sum + tempMul;
         }
     }
     double norm = sqrt(sum);
     return norm;
 }
 
+void multiply(double **matrix1, double **matrix2, double **output){
+    // W dims are ROWS ROWS and x dims are ROWS COLUMNS
+
+    for (int i=0; i<ROWS; i++){
+        for (int j=0; j<COLUMNS; j++){
+            output[i][j] = 0;
+            for (int k=0; k<ROWS; k++){
+                output[i][j] += matrix1[i][k] * matrix2[k][j];
+            }
+        }
+    }
+}
+
 double calculateDistance(double *y, double *x){
     double sum = 0, dif;
-    for (int i=0;i<COLUMNS;i++){
+    for (int i=0; i<COLUMNS; i++){
         dif = y[i]-x[i];
         sum += dif * dif;
     }
@@ -149,25 +114,46 @@ double calculateDistance(double *y, double *x){
     return distance;
 }
 
-void multiply(double ** matrix1, double ** matrix2, double ** output){
-    // W dims are ROWS ROWS and x dims are ROWS COLUMNS
+// allocates a 2d array in continuous memory positions
+double **alloc_2d_double(int rows, int cols) {
+    double *data = (double *) malloc(rows*cols*sizeof(double));
+    double **array = (double **) malloc(rows*sizeof(double*));
+    for (int i=0; i<rows; i++)
+        array[i] = &(data[cols*i]);
+    return array;
+}
 
-    int i, j, k;
-    for (i=0; i<ROWS; i++){
-        for (j=0; j<COLUMNS; j++){
-            output[i][j] = 0;
-            for (k=0; k<ROWS; k++){
-                output[i][j] += matrix1[i][k] * matrix2[k][j];
-            }
+// copy the values of a 2d double array to another
+double **duplicate(double **a, double **b, int rows, int cols){
+    for (int i=0; i<rows; i++){
+        for (int j=0; j<cols; j++){
+            b[i][j] = a[i][j];
         }
     }
+    return b;
 }
 
-void print_matrix(double ** array, int rows, int cols){
+void print_matrix(double **array, int rows, int cols){
     for (int i=0; i<cols; i++){
         for (int j=0; j<rows; j++){
             printf("%f ", array[j][i]);
         }
         printf("\n");
     }
+}
+
+void save_matrix(double **matrix,int iteration){
+    char filename[18];
+    snprintf(filename, sizeof(filename), "%s%d", "output/output_", iteration);
+    FILE *iterOutput;
+    iterOutput = fopen(filename, "w");
+    for (int rows=0; rows<ROWS; ++rows){
+        for (int cols=0; cols<COLUMNS; ++cols){
+            fprintf(iterOutput, "%f", matrix[rows][cols]);
+            if (cols != COLUMNS - 1){
+                fprintf(iterOutput, ",");
+            }
+        }
+        fprintf(iterOutput, "\n");
+    }
 }
\ No newline at end of file
diff --git a/serialDeclarations.h b/serialDeclarations.h
index 8e919aa..a43679e 100644
--- a/serialDeclarations.h
+++ b/serialDeclarations.h
@@ -8,18 +8,19 @@
 #define COLUMNS     2
 #define ROWS        600
 
-struct parameters {
+typedef struct parameters {
     double epsilon;
     bool verbose;
     bool display;
-};
+} parameters;
 
-double **alloc_2d_double(int rows, int cols);
-double **duplicate(double **a, double **b, int rows, int cols);
 void meanshift(double **x, int h, struct parameters *opt);
 double norm(double ** m, int rows, int cols);
 void multiply(double ** matrix1, double ** matrix2, double ** output);
 double calculateDistance(double *, double *);
+double **alloc_2d_double(int rows, int cols);
+double **duplicate(double **a, double **b, int rows, int cols);
 void print_matrix(double ** array, int rows, int cols);
+void save_matrix(double **matrix,int iteration);
 
 #endif //SERIAL_DECLARATIONS_H
\ No newline at end of file