diff --git a/data/32 b/data/32
new file mode 100644
index 0000000..a1d7033
Binary files /dev/null and b/data/32 differ
diff --git a/data/run_helper.txt b/data/run_helper.txt
index 7e27a49..ff1679f 100644
--- a/data/run_helper.txt
+++ b/data/run_helper.txt
@@ -43,4 +43,17 @@ Dataset s4 {
 	122 iterations with epsilon = 0.0001 and h = 30000
 	110 iterations with epsilon = 0.01 and h = 30000
 	108 iterations with epsilon = 1 and h = 31000
+}
+
+Dataset 32 {
+	int NUMBER_OF_POINTS = 1024;
+	int DIMENSIONS = 32;
+	char* POINTS_FILENAME = "../data/32";
+	
+	A good deviation is 30000-35000
+	For Processing script:
+		no use
+
+	23 iterations with epsilon = 0.0001 and h = 20
+	44 iterations with epsilon = 0.01 and h = 10
 }
\ No newline at end of file
diff --git a/mean_shift_cuda/Makefile b/mean_shift_cuda/Makefile
index 6a10a8f..f7ff628 100644
--- a/mean_shift_cuda/Makefile
+++ b/mean_shift_cuda/Makefile
@@ -5,7 +5,7 @@ SHELL := /bin/bash
 
 CC = nvcc
 HOST_COMPILER = -ccbin gcc
-CUDA_FLAGS = -arch=sm_21 -Wno-deprecated-gpu-targets 
+CUDA_FLAGS = -arch=sm_21 -Wno-deprecated-gpu-targets -lcublas
 C_FLAGS = -lm -O3 -I.
 
 COMPILE_FLAGS = $(HOST_COMPILER) -x cu $(CUDA_FLAGS) -dc $(C_FLAGS)
diff --git a/mean_shift_cuda/meanshift.cu b/mean_shift_cuda/meanshift.cu
index fbd5260..f5324dc 100644
--- a/mean_shift_cuda/meanshift.cu
+++ b/mean_shift_cuda/meanshift.cu
@@ -16,7 +16,7 @@ struct timeval startwtime, endwtime;
 double seq_time;
 
 int main(int argc, char **argv){
-    int iterations;
+    int iterations = 0;
     double **vectors, **shifted_points;
     char *labels;
 
diff --git a/mean_shift_cuda/meanshift_gpu_utils.cu b/mean_shift_cuda/meanshift_gpu_utils.cu
index 1bc88a0..ba886ee 100644
--- a/mean_shift_cuda/meanshift_gpu_utils.cu
+++ b/mean_shift_cuda/meanshift_gpu_utils.cu
@@ -5,6 +5,8 @@
 #include <string.h>
 #include <sys/time.h>
 
+#include <cublas_v2.h>
+
 #include "meanshift_utils.h"
 #include "meanshift_gpu_utils.h"
 
@@ -127,13 +129,18 @@ int meanshift(double **original_points, double ***shifted_points, int deviation
         save_matrix((*shifted_points), iteration);
     }
 
-    // calculates norm of the new mean shift vector
-    double current_norm = norm(mean_shift_vector, NUMBER_OF_POINTS, DIMENSIONS);
+    // calculates norm of the new mean shift vector in GPU using "cuBlas" library function
+    double current_norm = 0;
+    cublasHandle_t handle;
+    cublasCreate(&handle);
+    cublasDnrm2(handle, NUMBER_OF_POINTS * DIMENSIONS, d_mean_shift_vector.elements, 1, &current_norm);
+    cublasDestroy(handle);
+
     if (params.verbose){
         printf("Iteration n. %d, error\t%f \n", iteration, current_norm);
     }
 
-    /** iterates until convergence **/
+    // iterates until convergence
     if (current_norm > opt->epsilon) {
         ++iteration;
         meanshift(original_points, shifted_points, deviation, opt);
@@ -244,7 +251,6 @@ void calculate_kernel_matrix(Matrix d_shifted_points, Matrix d_original_points,
 }
 
 void calculate_denominator(Matrix d_kernel_matrix, Matrix d_denominator){
-    int size;
     static bool first_iter = true;
     // gets max block size supported from the device
     static int requested_block_size = device_properties.maxThreadsPerBlock;
diff --git a/mean_shift_cuda/meanshift_kernels.cu b/mean_shift_cuda/meanshift_kernels.cu
index 319a6b2..04ff883 100644
--- a/mean_shift_cuda/meanshift_kernels.cu
+++ b/mean_shift_cuda/meanshift_kernels.cu
@@ -78,14 +78,4 @@ __global__ void denominator_kernel(Matrix denominator, Matrix kernel_matrix){
          cell_value += kernel_matrix.elements[row * kernel_matrix.width + column];
     }
     denominator.elements[row] = cell_value;
-}
-
-//__global__ double calcNorm(Matrix mean_shift_vector){
-//    float sum =0;
-//    for (int k=0; k< patchSize; k++){
-//        for (int l=0; l<patchSize; l++){
-//            sum+=(fNi(k,l)-fNj(k,l))*(fNi(k,l)-fNj(k,l))*H(k,l);
-//        }
-//    }
-//    return sum;
-//}
\ No newline at end of file
+}
\ No newline at end of file
diff --git a/mean_shift_cuda/meanshift_utils.cu b/mean_shift_cuda/meanshift_utils.cu
index 60b6cf7..6cef7dd 100644
--- a/mean_shift_cuda/meanshift_utils.cu
+++ b/mean_shift_cuda/meanshift_utils.cu
@@ -123,19 +123,6 @@ void init(double ***vectors, char **labels){
     }
 }
 
-// TODO check why there's is a difference in the norm calculate in matlab
-double norm(double **matrix, int rows, int cols){
-    double sum=0, temp_mul=0;
-    for (int i=0; i<rows; i++) {
-        for (int j=0; j<cols; j++) {
-            temp_mul = matrix[i][j] * matrix[i][j];
-            sum = sum + temp_mul;
-        }
-    }
-    double norm = sqrt(sum);
-    return norm;
-}
-
 double **alloc_double(int rows, int cols) {
     double *data = (double *) malloc(rows*cols*sizeof(double));
     double **array = (double **) malloc(rows*sizeof(double*));
diff --git a/mean_shift_cuda/meanshift_utils.h b/mean_shift_cuda/meanshift_utils.h
index ce6c0cd..7c822a2 100644
--- a/mean_shift_cuda/meanshift_utils.h
+++ b/mean_shift_cuda/meanshift_utils.h
@@ -16,9 +16,6 @@ void get_args(int argc, char **argv, parameters *params);
 //Function init reads the dataset and label arrays from the corresponding files.
 void init(double ***vectors, char **labels);
 
-//Function norm returns the second norm of matrix of dimensions rowsXcols.
-double norm(double **matrix, int rows, int cols);
-
 //Function alloc_double allocates rows*cols bytes of continuous memory.
 double **alloc_double(int rows, int cols);