diff --git a/mean_shift_cuda/meanshift.cu b/mean_shift_cuda/meanshift.cu
index f5324dc..2ebae16 100644
--- a/mean_shift_cuda/meanshift.cu
+++ b/mean_shift_cuda/meanshift.cu
@@ -8,8 +8,8 @@
 int DEVIATION = 1;
 int NUMBER_OF_POINTS = 600;
 int DIMENSIONS = 2;
-char* POINTS_FILENAME = "../data/X.bin";
-char* LABELS_FILENAME = "../data/L.bin";
+const char *POINTS_FILENAME = "../data/X.bin";
+const char *LABELS_FILENAME = "../data/L.bin";
 parameters params;
 
 struct timeval startwtime, endwtime;
diff --git a/mean_shift_cuda/meanshift_gpu_utils.cu b/mean_shift_cuda/meanshift_gpu_utils.cu
index ba886ee..c92e78f 100644
--- a/mean_shift_cuda/meanshift_gpu_utils.cu
+++ b/mean_shift_cuda/meanshift_gpu_utils.cu
@@ -15,8 +15,8 @@ cudaDeviceProp device_properties;
 struct timeval start, end;
 double seq;
 
-//Based on https://stackoverflow.com/a/28113186
-//Pio psagmeno link https://www.cs.virginia.edu/~csadmin/wiki/index.php/CUDA_Support/Choosing_a_GPU
+//Based on:
+//          https://www.cs.virginia.edu/~csadmin/wiki/index.php/CUDA_Support/Choosing_a_GPU
 void set_GPU(){
     int devices_count = 0, max_multiprocessors = 0, max_device = 0;
 
@@ -56,7 +56,7 @@ int meanshift(double **original_points, double ***shifted_points, int deviation
     int size = 0;
     static int iteration = 0;
     static double **kernel_matrix, **mean_shift_vector;
-    double **new_shift;
+    double **new_shift, current_norm = 0;
 
     // device variables
     static Matrix d_original_points, d_shifted_points, d_kernel_matrix, d_denominator,
@@ -78,7 +78,7 @@ int meanshift(double **original_points, double ***shifted_points, int deviation
             }
         }
 
-        // allocates memory for other arrays needed
+        // allocates memory for kernel_matrix
         kernel_matrix = alloc_double(NUMBER_OF_POINTS, NUMBER_OF_POINTS);
 
         // tic
@@ -91,7 +91,6 @@ int meanshift(double **original_points, double ***shifted_points, int deviation
         gettimeofday (&end, NULL);
         seq = (double)((end.tv_usec - start.tv_usec)/1.0e6 + end.tv_sec - start.tv_sec);
 
-
 //        printf("%s wall clock time = %f\n","Device memory allocation", seq);
         // to create output data file
         printf("%f ", seq);
@@ -105,8 +104,6 @@ int meanshift(double **original_points, double ***shifted_points, int deviation
     // calculates denominator
     calculate_denominator(d_kernel_matrix, d_denominator);
 
-
-
     // creates new y vector
     // allocates memory in every recursion
     new_shift = alloc_double(NUMBER_OF_POINTS, DIMENSIONS);
@@ -130,11 +127,20 @@ int meanshift(double **original_points, double ***shifted_points, int deviation
     }
 
     // calculates norm of the new mean shift vector in GPU using "cuBlas" library function
-    double current_norm = 0;
     cublasHandle_t handle;
-    cublasCreate(&handle);
-    cublasDnrm2(handle, NUMBER_OF_POINTS * DIMENSIONS, d_mean_shift_vector.elements, 1, &current_norm);
-    cublasDestroy(handle);
+    cublasStatus_t cublas_status = cublasCreate(&handle);
+    if (cublas_status != CUBLAS_STATUS_SUCCESS){
+        exit(cublas_status);
+    }
+    cublas_status = cublasDnrm2(handle, NUMBER_OF_POINTS * DIMENSIONS, d_mean_shift_vector.elements,
+        1, &current_norm);
+    if (cublas_status != CUBLAS_STATUS_SUCCESS){
+        exit(cublas_status);
+    }
+    cublas_status = cublasDestroy(handle);
+    if (cublas_status != CUBLAS_STATUS_SUCCESS){
+        exit(cublas_status);
+    }
 
     if (params.verbose){
         printf("Iteration n. %d, error\t%f \n", iteration, current_norm);
@@ -244,7 +250,6 @@ void calculate_kernel_matrix(Matrix d_shifted_points, Matrix d_original_points,
     gettimeofday (&end, NULL);
     seq = (double)((end.tv_usec - start.tv_usec)/1.0e6 + end.tv_sec - start.tv_sec);
 
-
 //    printf("%s wall clock time = %f\n","Copying from device to host", seq);
     // to create output data file
         printf("%f ", seq);
@@ -331,7 +336,6 @@ void shift_points(Matrix d_kernel_matrix, Matrix d_original_points, Matrix d_shi
     gettimeofday (&end, NULL);
     seq = (double)((end.tv_usec - start.tv_usec)/1.0e6 + end.tv_sec - start.tv_sec);
 
-
 //    printf("%s wall clock time = %f\n","Copying from device to host", seq);
     // to create output data file
         printf("%f ", seq);
diff --git a/mean_shift_cuda/meanshift_gpu_utils.h b/mean_shift_cuda/meanshift_gpu_utils.h
index 48d466c..1c2cdf5 100644
--- a/mean_shift_cuda/meanshift_gpu_utils.h
+++ b/mean_shift_cuda/meanshift_gpu_utils.h
@@ -4,7 +4,7 @@
 #include "meanshift_kernels.h"
 
 //GPU error check snippet taken from:
-//https://stackoverflow.com/a/14038590
+//              https://stackoverflow.com/a/14038590
 #define gpuErrchk(ans) { gpuAssert((ans), __FILE__, __LINE__); }
 inline void gpuAssert(cudaError_t code, const char *file, int line, bool abort=true){
    if (code != cudaSuccess){
@@ -17,37 +17,42 @@ inline void gpuAssert(cudaError_t code, const char *file, int line, bool abort=t
 extern int DEVIATION;
 extern int NUMBER_OF_POINTS;
 extern int DIMENSIONS;
-extern char* POINTS_FILENAME;
-extern char* LABELS_FILENAME;
-extern parameters params;
+extern const char* POINTS_FILENAME;
+extern const char* LABELS_FILENAME;
+extern Parameters params;
 extern cudaDeviceProp device_properties;
 
+//Function set_GPU parses available GPU devices, selects the one with the most multi-processors for
+//usage and stores its properties in global struct device_properties
 void set_GPU();
 
-//Function meanshift recursively shifts original points according to th
-//mean-shift algorithm saving the result to shiftedPoints. Struct opt has user
-//options, h is the desirable deviation.
+//Function meanshift recursively shifts original points according to the mean-shift algorithm saving
+//the result to shiftedPoints. Struct opt has user options, h is the desirable deviation
 int meanshift(double **original_points, double ***shifted_points, int h
-    , parameters *opt);
+    , Parameters *opt);
 
+//Function init_device_memory allocates memory for necessary arrays in the device
 void init_device_memory(double **original_points, double **shifted_points,
-    Matrix *d_original_points, Matrix *d_shifted_points,
-    Matrix *d_kernel_matrix, Matrix *d_denominator, Matrix *d_new_shift);
+    Matrix *d_original_points, Matrix *d_shifted_points, Matrix *d_kernel_matrix,
+    Matrix *d_denominator, Matrix *d_new_shift);
 
+//Function calculate_kernel_matrix is a wrapper for the kernel call of the corresponding kernel
+//"calculate_kernel_matrix_kernel" that calculates the kernel matrix
 void calculate_kernel_matrix(Matrix d_shifted_points, Matrix d_original_points,
     Matrix d_kernel_matrix, double deviation, double ***kernel_matrix);
 
-//Function multiply allocates memory in GPU, sends the data and calls the 
-//multiply kernel function.
+//Function calculate_denominator is a wrapper for the kernel call of the corresponding kernel
+//"calculate_denominator_kernel" that calculates the denominator of shifted points fraction
+void calculate_denominator(Matrix d_kernel_matrix, Matrix d_denominator);
+
+//Function shift_points is a wrapper for the kernel call of the corresponding kernel
+//"shift_points_kernel" that shifts the positions of all points
 void shift_points(Matrix d_kernel_matrix, Matrix d_original_points, Matrix d_shifted_points,
     Matrix d_new_shift, Matrix d_denominator, Matrix d_mean_shift_vector, double **kernel_matrix,
     double **original_points, double ***new_shift, double ***mean_shift_vector);
 
+//Function free_device_memory frees device's previously allocated memory
 void free_device_memory(Matrix d_original_points, Matrix d_kernel_matrix, Matrix d_denominator,
     Matrix d_new_shift);
 
-//Function calculate_denominator allocates memory in GPU, sends the data and calls the
-//denominator kernel function.
-void calculate_denominator(Matrix d_kernel_matrix, Matrix d_denominator);
-
 #endif //SERIAL_GPU_UTILS_H
\ No newline at end of file
diff --git a/mean_shift_cuda/meanshift_kernels.cu b/mean_shift_cuda/meanshift_kernels.cu
index 04ff883..a20055d 100644
--- a/mean_shift_cuda/meanshift_kernels.cu
+++ b/mean_shift_cuda/meanshift_kernels.cu
@@ -16,7 +16,8 @@ __global__ void calculate_kernel_matrix_kernel(Matrix shifted_points, Matrix ori
     // calculate distance
     double sum = 0, dif;
     for (int i=0; i<dimensions; i++){
-        dif = shifted_points.elements[row * dimensions + i] - original_points.elements[col * dimensions + i];
+        dif = shifted_points.elements[row * dimensions + i]
+            - original_points.elements[col * dimensions + i];
         sum += dif * dif;
     }
     double distance = sqrt(sum);
@@ -34,8 +35,25 @@ __global__ void calculate_kernel_matrix_kernel(Matrix shifted_points, Matrix ori
     }
 }
 
-__global__ void shift_points_kernel(Matrix original_points, Matrix kernel_matrix, Matrix shifted_points,
-    Matrix new_shift, Matrix denominator, Matrix mean_shift_vector){
+__global__ void denominator_kernel(Matrix denominator, Matrix kernel_matrix){
+    // each thread computes one element of denominator_kernel
+    // by accumulating results into cell_value
+    double cell_value = 0;
+    int row = blockIdx.x * blockDim.x + threadIdx.x;
+
+    // performs calculations only if thread's indexes are within matrix bounds
+    if (row >= denominator.height){
+        return;
+    }
+
+    for (int column = 0; column < kernel_matrix.width; ++column){
+         cell_value += kernel_matrix.elements[row * kernel_matrix.width + column];
+    }
+    denominator.elements[row] = cell_value;
+}
+
+__global__ void shift_points_kernel(Matrix original_points, Matrix kernel_matrix,
+    Matrix shifted_points, Matrix new_shift, Matrix denominator, Matrix mean_shift_vector){
     // each thread computes one element of new_shift
     // by accumulating results into cell_value
     double cell_value = 0;
@@ -61,21 +79,4 @@ __global__ void shift_points_kernel(Matrix original_points, Matrix kernel_matrix
     mean_shift_vector.elements[row * new_shift.width + col] =
         new_shift.elements[row * new_shift.width + col] -
         shifted_points.elements[row * new_shift.width + col];
-}
-
-__global__ void denominator_kernel(Matrix denominator, Matrix kernel_matrix){
-    // each thread computes one element of denominator_kernel
-    // by accumulating results into cell_value
-    double cell_value = 0;
-    int row = blockIdx.x * blockDim.x + threadIdx.x;
-
-    // performs calculations only if thread's indexes are within matrix bounds
-    if (row >= denominator.height){
-        return;
-    }
-
-    for (int column = 0; column < kernel_matrix.width; ++column){
-         cell_value += kernel_matrix.elements[row * kernel_matrix.width + column];
-    }
-    denominator.elements[row] = cell_value;
 }
\ No newline at end of file
diff --git a/mean_shift_cuda/meanshift_kernels.h b/mean_shift_cuda/meanshift_kernels.h
index d33641e..0ff8070 100644
--- a/mean_shift_cuda/meanshift_kernels.h
+++ b/mean_shift_cuda/meanshift_kernels.h
@@ -1,19 +1,26 @@
 #ifndef SERIAL_KERNELS_H    /*    Include guard    */
 #define SERIAL_KERNELS_H
 
+/*      Structures     */
+
+//Matrix is used to describe matrices
 typedef struct {
     int width;
     int height;
     double *elements;
 } Matrix;
 
+//Kernel calculate_kernel_matrix_kernel calculates the current kernel matrix
 __global__ void calculate_kernel_matrix_kernel(Matrix shifted_points, Matrix original_points,
     double deviation, Matrix kernel_matrix);
 
-//Function multiply_kernel calculates the product of matrices 1 and 2 into output.
-__global__ void shift_points_kernel(Matrix original_points, Matrix kernel_matrix, Matrix shifted_points,
-    Matrix new_shift, Matrix denominator, Matrix mean_shift_vector);
-
+//Kernel denominator_kernel calculates the sum in the denominator of the fraction used to find new
+//(shifted) positions of the points
 __global__ void denominator_kernel(Matrix denominator, Matrix kernel_matrix);
 
+//Kernel shift_points_kernel shifts the positions of all points and calculates the new mean shift
+//vector according to the new point array
+__global__ void shift_points_kernel(Matrix original_points, Matrix kernel_matrix,
+    Matrix shifted_points, Matrix new_shift, Matrix denominator, Matrix mean_shift_vector);
+
 #endif //SERIAL_KERNELS_H
\ No newline at end of file
diff --git a/mean_shift_cuda/meanshift_utils.h b/mean_shift_cuda/meanshift_utils.h
index 7c822a2..3be505a 100644
--- a/mean_shift_cuda/meanshift_utils.h
+++ b/mean_shift_cuda/meanshift_utils.h
@@ -3,30 +3,33 @@
 
 #include <stdbool.h>
 
-/*        Structs       */
+/*      Structures     */
+
+//Parameters is used to store session specific variables in an orderly way
 typedef struct parameters {
     double epsilon;
     bool verbose;
     bool display;
-} parameters;
+} Parameters;
 
-//Function get_args parses command line arguments.
-void get_args(int argc, char **argv, parameters *params);
+//Function get_args parses command line arguments
+void get_args(int argc, char **argv, Parameters *params);
 
-//Function init reads the dataset and label arrays from the corresponding files.
+//Function init sets up the GPU for later use, gets its properties and reads the dataset and label
+//arrays from the corresponding files
 void init(double ***vectors, char **labels);
 
-//Function alloc_double allocates rows*cols bytes of continuous memory.
+//Function alloc_double allocates rows*cols bytes of continuous memory
 double **alloc_double(int rows, int cols);
 
-//Function duplicate copies the values of source array to dest array.
+//Function duplicate copies the values of source array to dest array
 void duplicate(double **source, int rows, int cols, double ***dest);
 
-//Function print_matrix prints array of dimensions rowsXcols to the console.
+//Function print_matrix prints array of dimensions <rows X cols> to the console
 void print_matrix(double **array, int rows, int cols);
 
-//Function save_matrix prints matrix in a csv file with path/filename
-//"output/output_iteration". If a file already exists new lines are concatenated.
+//Function save_matrix stores matrix in a csv file with path/filename "../output/output_iteration".
+//If a file already exists new lines are concatenated
 void save_matrix(double **matrix, int iteration);
 
 #endif //SERIAL_UTILS_H
\ No newline at end of file