Browse Source

Clean-up

master
Apostolos Fanakis 6 years ago
parent
commit
d861d287e4
  1. 16
      README.md
  2. 41
      data/L.bin
  3. 21
      data/run_helper.txt
  4. 50
      mean_shift_cuda/32.txt
  5. 17
      mean_shift_cuda/meanshift.cu
  6. 49
      mean_shift_cuda/meanshift_utils.cu
  7. 8
      mean_shift_cuda/meanshift_utils.h
  8. 152
      mean_shift_cuda/s4_cuda.txt
  9. 8
      mean_shift_cuda_shared_mem/meanshift.cu
  10. 49
      mean_shift_cuda_shared_mem/meanshift_utils.cu
  11. 2
      mean_shift_cuda_shared_mem/meanshift_utils.h
  12. 152
      mean_shift_cuda_shared_mem/s4_shared.txt
  13. 26
      mean_shift_serial/32_serial.txt
  14. 131
      mean_shift_serial/s4_serial.txt
  15. 8
      mean_shift_serial/serial.c
  16. 116
      mean_shift_serial/serial.txt
  17. 34
      mean_shift_serial/serial_declarations.c
  18. 5
      mean_shift_serial/serial_declarations.h
  19. 42
      output/visualization/visualization.pde
  20. 50
      stats/32/32_runtime_loceye.txt
  21. 26
      stats/32/32_runtime_loceye_serial.txt
  22. 57
      stats/s1/s1_runtime_loceye.txt
  23. 33
      stats/s1/s1_runtime_loceye_serial.txt
  24. 57
      stats/s1/s1_runtime_shared_loceye.txt
  25. 194
      stats/s4/s4_runtime_loceye.txt
  26. 131
      stats/s4/s4_runtime_loceye_serial.txt
  27. 3
      stats/s4/s4_runtime_loceye_shared.txt
  28. 118
      stats/s4/s4_serial_diades_run.txt
  29. 118
      stats/s4/s4_serial_local_run.txt
  30. 33
      stats/serial_in_Matlab_and_C.txt
  31. 53
      testers/cuda_iteration_test.cu
  32. 51
      testers/iteration.c
  33. 38
      testers/prefactor_code.c
  34. 114
      testers/test_code.c

16
README.md

@ -1,4 +1,3 @@
# Mean-shift # Mean-shift
[Mean-shift] is a mathematical procedure, adopted in algorithms, designed in the 70's by Fukunaga and Hostetler. The algorithm is used for: [Mean-shift] is a mathematical procedure, adopted in algorithms, designed in the 70's by Fukunaga and Hostetler. The algorithm is used for:
@ -31,7 +30,7 @@ $ make
Run the code with the command: Run the code with the command:
```sh ```sh
$ ./meanshift h e N D Pd Pl $ ./meanshift h e N D Pd
``` ```
where: where:
@ -40,12 +39,17 @@ where:
3. **N** is the the number of points 3. **N** is the the number of points
4. **D** is the number of dimensions of each point 4. **D** is the number of dimensions of each point
5. **Pd** is the path of the dataset file 5. **Pd** is the path of the dataset file
6. **Pl** is the path of the labels file 6. **--verbose** | **-v** is an optional flag to enable execution information output
7. **--verbose** | **-v** is an optional flag to enable execution information output 7. **--output** | **-o** is an optional flag to enable points output in each iteration
8. **--output** | **-o** is an optional flag to enable points output in each iteration
Useful information regarding the appropriate ranges of these values are provided in the file "data/run_helper.txt".
Example:
```sh
$ ./meanshift 1 0.000001 600 2 ../data/X.bin -v -o
```
**Free Software, Hell Yeah!** When using the -o flag, a folder named "output" **must** be present within the root directory (at the same level as mean_shift_* folders).
[//]: # (Links) [//]: # (Links)

41
data/L.bin

@ -1,41 +0,0 @@



21
data/run_helper.txt

@ -4,12 +4,6 @@ Dataset X {
char* POINTS_FILENAME = "../data/X.bin"; char* POINTS_FILENAME = "../data/X.bin";
A good deviation is 1 A good deviation is 1
For Processing script:
float maxX = 17.124000;
float minX = 3.402000;
float maxY = 14.996000;
float minY = 3.178000;
12 iterations 12 iterations
} }
@ -19,12 +13,6 @@ Dataset s1 {
char* POINTS_FILENAME = "../data/s1"; char* POINTS_FILENAME = "../data/s1";
A good deviation is 30000 A good deviation is 30000
For Processing script:
float maxX = 961951;
float minX = 19835;
float maxY = 970756;
float minY = 51121;
~28 iterations ~28 iterations
} }
@ -34,12 +22,6 @@ Dataset s4 {
char* POINTS_FILENAME = "../data/s4"; char* POINTS_FILENAME = "../data/s4";
A good deviation is 30000-35000 A good deviation is 30000-35000
For Processing script:
float maxX = 932954;
float minX = 89604;
float maxY = 977215;
float minY = 35412;
122 iterations with epsilon = 0.0001 and h = 30000 122 iterations with epsilon = 0.0001 and h = 30000
110 iterations with epsilon = 0.01 and h = 30000 110 iterations with epsilon = 0.01 and h = 30000
108 iterations with epsilon = 1 and h = 31000 108 iterations with epsilon = 1 and h = 31000
@ -51,9 +33,6 @@ Dataset 32 {
char* POINTS_FILENAME = "../data/32"; char* POINTS_FILENAME = "../data/32";
A good deviation is 30000-35000 A good deviation is 30000-35000
For Processing script:
no use
23 iterations with epsilon = 0.0001 and h = 20 23 iterations with epsilon = 0.0001 and h = 20
44 iterations with epsilon = 0.01 and h = 10 44 iterations with epsilon = 0.01 and h = 10
} }

50
mean_shift_cuda/32.txt

@ -1,50 +0,0 @@
Device chosen is "GeForce GTX 480"
Device has 15 multi processors and compute capability 2.0
Max threads per block supported are 1024
Reading dataset and labels...
Done.
Device memory allocation wall clock time = 0.039595
calculate_kernel_matrix_kernel called with:
dimBlock.x = 30, dimBlock.y = 30
dimGrid.x = 35, dimGrid.y = 35
calculate_denominator called with:
dimBlock.x = 1024, dimBlock.y = 1
dimGrid.x = 1, dimGrid.y = 1
shift_points_kernel called with:
dimBlock.x = 28, dimBlock.y = 32
dimGrid.x = 37, dimGrid.y = 1
Recursion n. 0, error 638.769335
Recursion n. 1, error 86.996834
Recursion n. 2, error 540.383480
Recursion n. 3, error 130.879803
Recursion n. 4, error 126.467953
Recursion n. 5, error 256.415922
Recursion n. 6, error 6.383913
Recursion n. 7, error 1.206431
Recursion n. 8, error 0.373697
Recursion n. 9, error 0.190936
Recursion n. 10, error 0.107748
Recursion n. 11, error 0.061548
Recursion n. 12, error 0.035299
Recursion n. 13, error 0.020304
Recursion n. 14, error 0.011708
Recursion n. 15, error 0.006766
Recursion n. 16, error 0.003918
Recursion n. 17, error 0.002273
Recursion n. 18, error 0.001321
Recursion n. 19, error 0.000769
Recursion n. 20, error 0.000448
Recursion n. 21, error 0.000262
Recursion n. 22, error 0.000153
Recursion n. 23, error 0.000090
Copying between device and host wall clock time = 0.111358
Total number of recursions = 23
Mean Shift wall clock time = 1.279128

17
mean_shift_cuda/meanshift.cu

@ -9,7 +9,6 @@ int DEVIATION = 20;
int NUMBER_OF_POINTS = 1024; int NUMBER_OF_POINTS = 1024;
int DIMENSIONS = 32; int DIMENSIONS = 32;
const char *POINTS_FILENAME = "../data/32"; const char *POINTS_FILENAME = "../data/32";
const char *LABELS_FILENAME = "../data/L.bin";
parameters params; parameters params;
struct timeval startwtime, endwtime; struct timeval startwtime, endwtime;
@ -18,13 +17,14 @@ double seq_time;
int main(int argc, char **argv){ int main(int argc, char **argv){
int recursions = 0; int recursions = 0;
double **vectors, **shifted_points; double **vectors, **shifted_points;
char *labels;
//inits parameters
params.epsilon = 0.0001; params.epsilon = 0.0001;
params.verbose = true; params.verbose = true;
params.display = true; params.display = true;
//get_args(argc, argv, &params); //commented out while in development
init(&vectors, &labels); get_args(argc, argv, &params);
init(&vectors);
// tic // tic
gettimeofday (&startwtime, NULL); gettimeofday (&startwtime, NULL);
@ -34,12 +34,11 @@ int main(int argc, char **argv){
gettimeofday (&endwtime, NULL); gettimeofday (&endwtime, NULL);
seq_time = (double)((endwtime.tv_usec - startwtime.tv_usec)/1.0e6 + endwtime.tv_sec - startwtime.tv_sec); seq_time = (double)((endwtime.tv_usec - startwtime.tv_usec)/1.0e6 + endwtime.tv_sec - startwtime.tv_sec);
printf("\nTotal number of recursions = %d\n", recursions); printf("\nTotal number of recursions = %d\n", recursions);
printf("%s wall clock time = %f\n","Mean Shift", seq_time); printf("%s wall clock time = %f\n","Mean Shift", seq_time);
//free(vectors[0]); free(vectors[0]);
//free(vectors); free(vectors);
//free(shifted_points[0]); free(shifted_points[0]);
//free(shifted_points); free(shifted_points);
} }

49
mean_shift_cuda/meanshift_utils.cu

@ -10,14 +10,13 @@
#define OUTPUT_PREFIX "../output/output_" #define OUTPUT_PREFIX "../output/output_"
void get_args(int argc, char **argv, parameters *params){ void get_args(int argc, char **argv, parameters *params){
if (argc < 7) { if (argc < 6) {
printf("Usage: %s h e N D Pd Pl\nwhere:\n" printf("Usage: %s h e N D Pd Pl\nwhere:\n"
"\th is the variance\n" "\th is the variance\n"
"\te is the min distance, between two points, that is taken into account in computations\n" "\te is the min distance, between two points, that is taken into account in computations\n"
"\tN is the the number of points\n" "\tN is the the number of points\n"
"\tD is the number of dimensions of each point\n" "\tD is the number of dimensions of each point\n"
"\tPd is the path of the dataset file\n" "\tPd is the path of the dataset file\n"
"\tPl is the path of the labels file\n"
"\n\t--verbose | -v is an optional flag to enable execution information output" "\n\t--verbose | -v is an optional flag to enable execution information output"
"\n\t--output | -o is an optional flag to enable points output in each iteration", argv[0]); "\n\t--output | -o is an optional flag to enable points output in each iteration", argv[0]);
exit(1); exit(1);
@ -28,12 +27,11 @@ void get_args(int argc, char **argv, parameters *params){
NUMBER_OF_POINTS = atoi(argv[3]); NUMBER_OF_POINTS = atoi(argv[3]);
DIMENSIONS = atoi(argv[4]); DIMENSIONS = atoi(argv[4]);
POINTS_FILENAME = argv[5]; POINTS_FILENAME = argv[5];
LABELS_FILENAME = argv[6];
params->verbose = false; params->verbose = false;
params->display = false; params->display = false;
if (argc > 7){ if (argc > 6){
for (int index=7; index<argc; ++index){ for (int index=6; index<argc; ++index){
if (!strcmp(argv[index], "--verbose") || !strcmp(argv[index], "-v")){ if (!strcmp(argv[index], "--verbose") || !strcmp(argv[index], "-v")){
params->verbose = true; params->verbose = true;
} else if (!strcmp(argv[index], "--output") || !strcmp(argv[index], "-o")){ } else if (!strcmp(argv[index], "--output") || !strcmp(argv[index], "-o")){
@ -45,24 +43,25 @@ void get_args(int argc, char **argv, parameters *params){
} }
} }
/*printf("DEVIATION = %d\n" if (params->verbose){
printf("DEVIATION = %d\n"
"epsilon = %f\n" "epsilon = %f\n"
"NUMBER_OF_POINTS = %d\n" "NUMBER_OF_POINTS = %d\n"
"DIMENSIONS = %d\n" "DIMENSIONS = %d\n"
"POINTS_FILENAME = %s\n" "POINTS_FILENAME = %s\n"
"LABELS_FILENAME = %s\n"
"verbose = %d\n" "verbose = %d\n"
"display = %d\n", DEVIATION, params->epsilon, NUMBER_OF_POINTS, DIMENSIONS, POINTS_FILENAME "display = %d\n\n", DEVIATION, params->epsilon, NUMBER_OF_POINTS, DIMENSIONS, POINTS_FILENAME
, LABELS_FILENAME, params->verbose, params->display);*/ , params->verbose, params->display);
}
} }
void init(double ***vectors, char **labels){ void init(double ***vectors){
int bytes_read = 0; int bytes_read = 0;
set_GPU(); set_GPU();
if (params.verbose){ if (params.verbose){
printf("Reading dataset and labels...\n"); printf("Reading dataset...\n");
} }
// initializes vectors // initializes vectors
@ -90,34 +89,6 @@ void init(double ***vectors, char **labels){
} }
fclose(points_file); fclose(points_file);
// initializes file that will contain the labels (train)
FILE *labels_file;
labels_file = fopen(LABELS_FILENAME, "rb");
if (labels_file != NULL){
// NOTE : Labels were classified as <class 'numpy.uint8'>
// variables of type uint8 are stored as 1-byte (8-bit) unsigned integers
// gets number of labels
fseek(labels_file, 0L, SEEK_END);
long int pos = ftell(labels_file);
rewind(labels_file);
int label_elements = pos/ sizeof(char);
// allocates memory for the array
*labels = (char*)malloc(label_elements* sizeof(char));
fseek(labels_file, 0L, SEEK_SET);
bytes_read = fread((*labels), sizeof(char), label_elements, labels_file);
if ( bytes_read != label_elements ){
if(feof(points_file)){
printf("Premature end of file reached.\n");
} else{
printf("Error reading points file.");
}
fclose(labels_file);
exit(EXIT_FAILURE);
}
}
fclose(labels_file);
if (params.verbose){ if (params.verbose){
printf("Done.\n\n"); printf("Done.\n\n");
} }

8
mean_shift_cuda/meanshift_utils.h

@ -1,5 +1,5 @@
#ifndef SERIAL_UTILS_H /* Include guard */ #ifndef MEANSHIFT_UTILS_H /* Include guard */
#define SERIAL_UTILS_H #define MEANSHIFT_UTILS_H
#include <stdbool.h> #include <stdbool.h>
@ -17,7 +17,7 @@ void get_args(int argc, char **argv, Parameters *params);
//Function init sets up the GPU for later use, gets its properties and reads the dataset and label //Function init sets up the GPU for later use, gets its properties and reads the dataset and label
//arrays from the corresponding files //arrays from the corresponding files
void init(double ***vectors, char **labels); void init(double ***vectors);
//Function alloc_double allocates rows*cols bytes of continuous memory //Function alloc_double allocates rows*cols bytes of continuous memory
double **alloc_double(int rows, int cols); double **alloc_double(int rows, int cols);
@ -32,4 +32,4 @@ void print_matrix(double **array, int rows, int cols);
//If a file already exists new lines are concatenated //If a file already exists new lines are concatenated
void save_matrix(double **matrix, int iteration); void save_matrix(double **matrix, int iteration);
#endif //SERIAL_UTILS_H #endif //MEANSHIFT_UTILS_H

152
mean_shift_cuda/s4_cuda.txt

@ -1,152 +0,0 @@
Device chosen is "GeForce GTX 1070"
Device has 15 multi processors and compute capability 6.1
Max threads per block supported are 1024
Reading dataset and labels...
Done.
Device memory allocation wall clock time = 0.095415
calculate_kernel_matrix_kernel called with:
dimBlock.x = 32, dimBlock.y = 32
dimGrid.x = 157, dimGrid.y = 157
calculate_denominator called with:
dimBlock.x = 1024, dimBlock.y = 1
dimGrid.x = 5, dimGrid.y = 1
shift_points_kernel called with:
dimBlock.x = 512, dimBlock.y = 2
dimGrid.x = 10, dimGrid.y = 1
Recursion n. 0, error 900676.182157
Recursion n. 1, error 711281.993249
Recursion n. 2, error 572480.606235
Recursion n. 3, error 472793.915972
Recursion n. 4, error 391822.446089
Recursion n. 5, error 329853.726107
Recursion n. 6, error 284520.120142
Recursion n. 7, error 251741.146886
Recursion n. 8, error 221447.955499
Recursion n. 9, error 189542.833610
Recursion n. 10, error 165562.480033
Recursion n. 11, error 149331.022081
Recursion n. 12, error 134648.227158
Recursion n. 13, error 118810.681796
Recursion n. 14, error 103439.665629
Recursion n. 15, error 89152.026717
Recursion n. 16, error 79858.091608
Recursion n. 17, error 70399.328398
Recursion n. 18, error 60903.306505
Recursion n. 19, error 53256.394415
Recursion n. 20, error 46630.309716
Recursion n. 21, error 43169.000362
Recursion n. 22, error 40977.848445
Recursion n. 23, error 38912.881052
Recursion n. 24, error 37470.652381
Recursion n. 25, error 36952.180489
Recursion n. 26, error 36917.933786
Recursion n. 27, error 34428.561476
Recursion n. 28, error 33480.690673
Recursion n. 29, error 35518.955985
Recursion n. 30, error 33061.203927
Recursion n. 31, error 30148.792678
Recursion n. 32, error 28417.020165
Recursion n. 33, error 27655.498686
Recursion n. 34, error 27165.134668
Recursion n. 35, error 25027.198634
Recursion n. 36, error 20710.467322
Recursion n. 37, error 16930.822611
Recursion n. 38, error 16120.686909
Recursion n. 39, error 16815.717356
Recursion n. 40, error 17097.038444
Recursion n. 41, error 16726.706892
Recursion n. 42, error 16099.987853
Recursion n. 43, error 15356.230564
Recursion n. 44, error 14257.919881
Recursion n. 45, error 12730.999492
Recursion n. 46, error 11480.510729
Recursion n. 47, error 11125.428562
Recursion n. 48, error 11493.527715
Recursion n. 49, error 11795.776068
Recursion n. 50, error 11332.444467
Recursion n. 51, error 10075.346757
Recursion n. 52, error 8750.888719
Recursion n. 53, error 8311.623585
Recursion n. 54, error 9213.083171
Recursion n. 55, error 11262.777008
Recursion n. 56, error 13823.076207
Recursion n. 57, error 15865.328882
Recursion n. 58, error 16893.341640
Recursion n. 59, error 17352.672496
Recursion n. 60, error 17469.150241
Recursion n. 61, error 17222.122823
Recursion n. 62, error 16657.420627
Recursion n. 63, error 16203.759701
Recursion n. 64, error 16290.413766
Recursion n. 65, error 16287.849302
Recursion n. 66, error 15262.639817
Recursion n. 67, error 13189.079107
Recursion n. 68, error 10955.692696
Recursion n. 69, error 9095.181443
Recursion n. 70, error 7435.724905
Recursion n. 71, error 6139.139260
Recursion n. 72, error 5719.511494
Recursion n. 73, error 6350.790204
Recursion n. 74, error 7711.865247
Recursion n. 75, error 9187.147186
Recursion n. 76, error 9804.291233
Recursion n. 77, error 8781.079129
Recursion n. 78, error 6678.437559
Recursion n. 79, error 4871.649916
Recursion n. 80, error 4054.470133
Recursion n. 81, error 4014.509541
Recursion n. 82, error 4288.305311
Recursion n. 83, error 4604.405003
Recursion n. 84, error 4865.272719
Recursion n. 85, error 5073.005351
Recursion n. 86, error 5297.270792
Recursion n. 87, error 5655.494055
Recursion n. 88, error 6302.147393
Recursion n. 89, error 7429.778693
Recursion n. 90, error 9243.651896
Recursion n. 91, error 11741.122110
Recursion n. 92, error 13992.110851
Recursion n. 93, error 13780.915149
Recursion n. 94, error 10448.857685
Recursion n. 95, error 6504.263173
Recursion n. 96, error 3702.165546
Recursion n. 97, error 2046.597817
Recursion n. 98, error 1122.433200
Recursion n. 99, error 614.487187
Recursion n. 100, error 336.339454
Recursion n. 101, error 184.119650
Recursion n. 102, error 100.806778
Recursion n. 103, error 55.198920
Recursion n. 104, error 30.227808
Recursion n. 105, error 16.554127
Recursion n. 106, error 9.066123
Recursion n. 107, error 4.965323
Recursion n. 108, error 2.719447
Recursion n. 109, error 1.489425
Recursion n. 110, error 0.815755
Recursion n. 111, error 0.446790
Recursion n. 112, error 0.244708
Recursion n. 113, error 0.134028
Recursion n. 114, error 0.073408
Recursion n. 115, error 0.040206
Recursion n. 116, error 0.022021
Recursion n. 117, error 0.012061
Recursion n. 118, error 0.006606
Recursion n. 119, error 0.003618
Recursion n. 120, error 0.001982
Recursion n. 121, error 0.001085
Recursion n. 122, error 0.000594
Recursion n. 123, error 0.000326
Recursion n. 124, error 0.000178
Recursion n. 125, error 0.000098
Copying between device and host wall clock time = 4.864176
Total number of recursions = 125
Mean Shift wall clock time = 8.409867

8
mean_shift_cuda_shared_mem/meanshift.cu

@ -9,7 +9,6 @@ int DEVIATION = 20;
int NUMBER_OF_POINTS = 1024; int NUMBER_OF_POINTS = 1024;
int DIMENSIONS = 32; int DIMENSIONS = 32;
const char *POINTS_FILENAME = "../data/32"; const char *POINTS_FILENAME = "../data/32";
const char *LABELS_FILENAME = "../data/L.bin";
parameters params; parameters params;
struct timeval startwtime, endwtime; struct timeval startwtime, endwtime;
@ -18,13 +17,14 @@ double seq_time;
int main(int argc, char **argv){ int main(int argc, char **argv){
int recursions = 0; int recursions = 0;
double **vectors, **shifted_points; double **vectors, **shifted_points;
char *labels;
//inits parameters
params.epsilon = 0.0001; params.epsilon = 0.0001;
params.verbose = true; params.verbose = true;
params.display = true; params.display = true;
//get_args(argc, argv, &params); //commented out while in development
init(&vectors, &labels); get_args(argc, argv, &params);
init(&vectors);
// tic // tic
gettimeofday (&startwtime, NULL); gettimeofday (&startwtime, NULL);

49
mean_shift_cuda_shared_mem/meanshift_utils.cu

@ -10,14 +10,13 @@
#define OUTPUT_PREFIX "../output/output_" #define OUTPUT_PREFIX "../output/output_"
void get_args(int argc, char **argv, parameters *params){ void get_args(int argc, char **argv, parameters *params){
if (argc < 7) { if (argc < 6) {
printf("Usage: %s h e N D Pd Pl\nwhere:\n" printf("Usage: %s h e N D Pd Pl\nwhere:\n"
"\th is the variance\n" "\th is the variance\n"
"\te is the min distance, between two points, that is taken into account in computations\n" "\te is the min distance, between two points, that is taken into account in computations\n"
"\tN is the the number of points\n" "\tN is the the number of points\n"
"\tD is the number of dimensions of each point\n" "\tD is the number of dimensions of each point\n"
"\tPd is the path of the dataset file\n" "\tPd is the path of the dataset file\n"
"\tPl is the path of the labels file\n"
"\n\t--verbose | -v is an optional flag to enable execution information output" "\n\t--verbose | -v is an optional flag to enable execution information output"
"\n\t--output | -o is an optional flag to enable points output in each iteration", argv[0]); "\n\t--output | -o is an optional flag to enable points output in each iteration", argv[0]);
exit(1); exit(1);
@ -28,12 +27,11 @@ void get_args(int argc, char **argv, parameters *params){
NUMBER_OF_POINTS = atoi(argv[3]); NUMBER_OF_POINTS = atoi(argv[3]);
DIMENSIONS = atoi(argv[4]); DIMENSIONS = atoi(argv[4]);
POINTS_FILENAME = argv[5]; POINTS_FILENAME = argv[5];
LABELS_FILENAME = argv[6];
params->verbose = false; params->verbose = false;
params->display = false; params->display = false;
if (argc > 7){ if (argc > 6){
for (int index=7; index<argc; ++index){ for (int index=6; index<argc; ++index){
if (!strcmp(argv[index], "--verbose") || !strcmp(argv[index], "-v")){ if (!strcmp(argv[index], "--verbose") || !strcmp(argv[index], "-v")){
params->verbose = true; params->verbose = true;
} else if (!strcmp(argv[index], "--output") || !strcmp(argv[index], "-o")){ } else if (!strcmp(argv[index], "--output") || !strcmp(argv[index], "-o")){
@ -45,24 +43,25 @@ void get_args(int argc, char **argv, parameters *params){
} }
} }
/*printf("DEVIATION = %d\n" if (params->verbose){
printf("DEVIATION = %d\n"
"epsilon = %f\n" "epsilon = %f\n"
"NUMBER_OF_POINTS = %d\n" "NUMBER_OF_POINTS = %d\n"
"DIMENSIONS = %d\n" "DIMENSIONS = %d\n"
"POINTS_FILENAME = %s\n" "POINTS_FILENAME = %s\n"
"LABELS_FILENAME = %s\n"
"verbose = %d\n" "verbose = %d\n"
"display = %d\n", DEVIATION, params->epsilon, NUMBER_OF_POINTS, DIMENSIONS, POINTS_FILENAME "display = %d\n\n", DEVIATION, params->epsilon, NUMBER_OF_POINTS, DIMENSIONS, POINTS_FILENAME
, LABELS_FILENAME, params->verbose, params->display);*/ , params->verbose, params->display);
}
} }
void init(double ***vectors, char **labels){ void init(double ***vectors){
int bytes_read = 0; int bytes_read = 0;
set_GPU(); set_GPU();
if (params.verbose){ if (params.verbose){
printf("Reading dataset and labels...\n"); printf("Reading dataset...\n");
} }
// initializes vectors // initializes vectors
@ -90,34 +89,6 @@ void init(double ***vectors, char **labels){
} }
fclose(points_file); fclose(points_file);
// initializes file that will contain the labels (train)
FILE *labels_file;
labels_file = fopen(LABELS_FILENAME, "rb");
if (labels_file != NULL){
// NOTE : Labels were classified as <class 'numpy.uint8'>
// variables of type uint8 are stored as 1-byte (8-bit) unsigned integers
// gets number of labels
fseek(labels_file, 0L, SEEK_END);
long int pos = ftell(labels_file);
rewind(labels_file);
int label_elements = pos/ sizeof(char);
// allocates memory for the array
*labels = (char*)malloc(label_elements* sizeof(char));
fseek(labels_file, 0L, SEEK_SET);
bytes_read = fread((*labels), sizeof(char), label_elements, labels_file);
if ( bytes_read != label_elements ){
if(feof(points_file)){
printf("Premature end of file reached.\n");
} else{
printf("Error reading points file.");
}
fclose(labels_file);
exit(EXIT_FAILURE);
}
}
fclose(labels_file);
if (params.verbose){ if (params.verbose){
printf("Done.\n\n"); printf("Done.\n\n");
} }

2
mean_shift_cuda_shared_mem/meanshift_utils.h

@ -17,7 +17,7 @@ void get_args(int argc, char **argv, Parameters *params);
//Function init sets up the GPU for later use, gets its properties and reads the dataset and label //Function init sets up the GPU for later use, gets its properties and reads the dataset and label
//arrays from the corresponding files //arrays from the corresponding files
void init(double ***vectors, char **labels); void init(double ***vectors);
//Function alloc_double allocates rows*cols bytes of continuous memory //Function alloc_double allocates rows*cols bytes of continuous memory
double **alloc_double(int rows, int cols); double **alloc_double(int rows, int cols);

152
mean_shift_cuda_shared_mem/s4_shared.txt

@ -1,152 +0,0 @@
Device chosen is "GeForce GTX 1070"
Device has 15 multi processors and compute capability 6.1
Max threads per block supported are 1024
Reading dataset and labels...
Done.
Device memory allocation wall clock time = 0.000885
calculate_kernel_matrix_kernel called with:
dimBlock.x = 32, dimBlock.y = 32
dimGrid.x = 157, dimGrid.y = 157
calculate_denominator called with:
dimBlock.x = 1024, dimBlock.y = 1
dimGrid.x = 5, dimGrid.y = 1
shift_points_kernel called with:
dimBlock.x = 240, dimBlock.y = 2
dimGrid.x = 21, dimGrid.y = 1
Recursion n. 0, error 900676.182157
Recursion n. 1, error 711281.993249
Recursion n. 2, error 572480.606235
Recursion n. 3, error 472793.915972
Recursion n. 4, error 391822.446089
Recursion n. 5, error 329853.726107
Recursion n. 6, error 284520.120142
Recursion n. 7, error 251741.146886
Recursion n. 8, error 221447.955499
Recursion n. 9, error 189542.833610
Recursion n. 10, error 165562.480033
Recursion n. 11, error 149331.022081
Recursion n. 12, error 134648.227158
Recursion n. 13, error 118810.681796
Recursion n. 14, error 103439.665629
Recursion n. 15, error 89152.026717
Recursion n. 16, error 79858.091608
Recursion n. 17, error 70399.328398
Recursion n. 18, error 60903.306505
Recursion n. 19, error 53256.394415
Recursion n. 20, error 46630.309716
Recursion n. 21, error 43169.000362
Recursion n. 22, error 40977.848445
Recursion n. 23, error 38912.881052
Recursion n. 24, error 37470.652381
Recursion n. 25, error 36952.180489
Recursion n. 26, error 36917.933786
Recursion n. 27, error 34428.561476
Recursion n. 28, error 33480.690673
Recursion n. 29, error 35518.955985
Recursion n. 30, error 33061.203927
Recursion n. 31, error 30148.792678
Recursion n. 32, error 28417.020165
Recursion n. 33, error 27655.498686
Recursion n. 34, error 27165.134668
Recursion n. 35, error 25027.198634
Recursion n. 36, error 20710.467322
Recursion n. 37, error 16930.822611
Recursion n. 38, error 16120.686909
Recursion n. 39, error 16815.717356
Recursion n. 40, error 17097.038444
Recursion n. 41, error 16726.706892
Recursion n. 42, error 16099.987853
Recursion n. 43, error 15356.230564
Recursion n. 44, error 14257.919881
Recursion n. 45, error 12730.999492
Recursion n. 46, error 11480.510729
Recursion n. 47, error 11125.428562
Recursion n. 48, error 11493.527715
Recursion n. 49, error 11795.776068
Recursion n. 50, error 11332.444467
Recursion n. 51, error 10075.346757
Recursion n. 52, error 8750.888719
Recursion n. 53, error 8311.623585
Recursion n. 54, error 9213.083171
Recursion n. 55, error 11262.777008
Recursion n. 56, error 13823.076207
Recursion n. 57, error 15865.328882
Recursion n. 58, error 16893.341640
Recursion n. 59, error 17352.672496
Recursion n. 60, error 17469.150241
Recursion n. 61, error 17222.122823
Recursion n. 62, error 16657.420627
Recursion n. 63, error 16203.759701
Recursion n. 64, error 16290.413766
Recursion n. 65, error 16287.849302
Recursion n. 66, error 15262.639817
Recursion n. 67, error 13189.079107
Recursion n. 68, error 10955.692696
Recursion n. 69, error 9095.181443
Recursion n. 70, error 7435.724905
Recursion n. 71, error 6139.139260
Recursion n. 72, error 5719.511494
Recursion n. 73, error 6350.790204
Recursion n. 74, error 7711.865247
Recursion n. 75, error 9187.147186
Recursion n. 76, error 9804.291233
Recursion n. 77, error 8781.079129
Recursion n. 78, error 6678.437559
Recursion n. 79, error 4871.649916
Recursion n. 80, error 4054.470133
Recursion n. 81, error 4014.509541
Recursion n. 82, error 4288.305311
Recursion n. 83, error 4604.405003
Recursion n. 84, error 4865.272719
Recursion n. 85, error 5073.005351
Recursion n. 86, error 5297.270792
Recursion n. 87, error 5655.494055
Recursion n. 88, error 6302.147393
Recursion n. 89, error 7429.778693
Recursion n. 90, error 9243.651896
Recursion n. 91, error 11741.122110
Recursion n. 92, error 13992.110851
Recursion n. 93, error 13780.915149
Recursion n. 94, error 10448.857685
Recursion n. 95, error 6504.263173
Recursion n. 96, error 3702.165546
Recursion n. 97, error 2046.597817
Recursion n. 98, error 1122.433200
Recursion n. 99, error 614.487187
Recursion n. 100, error 336.339454
Recursion n. 101, error 184.119650
Recursion n. 102, error 100.806778
Recursion n. 103, error 55.198920
Recursion n. 104, error 30.227808
Recursion n. 105, error 16.554127
Recursion n. 106, error 9.066123
Recursion n. 107, error 4.965323
Recursion n. 108, error 2.719447
Recursion n. 109, error 1.489425
Recursion n. 110, error 0.815755
Recursion n. 111, error 0.446790
Recursion n. 112, error 0.244708
Recursion n. 113, error 0.134028
Recursion n. 114, error 0.073408
Recursion n. 115, error 0.040206
Recursion n. 116, error 0.022021
Recursion n. 117, error 0.012061
Recursion n. 118, error 0.006606
Recursion n. 119, error 0.003618
Recursion n. 120, error 0.001982
Recursion n. 121, error 0.001085
Recursion n. 122, error 0.000594
Recursion n. 123, error 0.000326
Recursion n. 124, error 0.000178
Recursion n. 125, error 0.000098
Copying between device and host wall clock time = 4.867401
Total number of recursions = 125
Mean Shift wall clock time = 8.303131

26
mean_shift_serial/32_serial.txt

@ -1,26 +0,0 @@
Iteration n. 0, error 212.611066
Iteration n. 1, error 51.768217
Iteration n. 2, error 18.321997
Iteration n. 3, error 7.902559
Iteration n. 4, error 3.830385
Iteration n. 5, error 1.990884
Iteration n. 6, error 1.077207
Iteration n. 7, error 0.596253
Iteration n. 8, error 0.334476
Iteration n. 9, error 0.189225
Iteration n. 10, error 0.107681
Iteration n. 11, error 0.061545
Iteration n. 12, error 0.035299
Iteration n. 13, error 0.020304
Iteration n. 14, error 0.011708
Iteration n. 15, error 0.006766
Iteration n. 16, error 0.003918
Iteration n. 17, error 0.002273
Iteration n. 18, error 0.001321
Iteration n. 19, error 0.000769
Iteration n. 20, error 0.000448
Iteration n. 21, error 0.000262
Iteration n. 22, error 0.000153
Iteration n. 23, error 0.000090
Total iterations = 23
Mean Shift wall clock time = 14.111556

131
mean_shift_serial/s4_serial.txt

@ -1,131 +0,0 @@
Iteration n. 0, error 900725.214707
Iteration n. 1, error 711240.249913
Iteration n. 2, error 572364.562445
Iteration n. 3, error 472414.550249
Iteration n. 4, error 391267.517200
Iteration n. 5, error 329290.004875
Iteration n. 6, error 283810.031420
Iteration n. 7, error 250529.098667
Iteration n. 8, error 219842.868199
Iteration n. 9, error 188154.106405
Iteration n. 10, error 164642.483534
Iteration n. 11, error 148533.171361
Iteration n. 12, error 134252.690689
Iteration n. 13, error 119338.958288
Iteration n. 14, error 104756.200878
Iteration n. 15, error 90709.382266
Iteration n. 16, error 81069.664373
Iteration n. 17, error 70968.599390
Iteration n. 18, error 60656.836465
Iteration n. 19, error 52460.966775
Iteration n. 20, error 45983.000348
Iteration n. 21, error 42992.393727
Iteration n. 22, error 40717.963103
Iteration n. 23, error 38174.207978
Iteration n. 24, error 36772.370608
Iteration n. 25, error 37072.563890
Iteration n. 26, error 37664.840852
Iteration n. 27, error 34789.973611
Iteration n. 28, error 32166.214159
Iteration n. 29, error 32054.774268
Iteration n. 30, error 27717.389825
Iteration n. 31, error 24851.091967
Iteration n. 32, error 24807.848863
Iteration n. 33, error 25528.369785
Iteration n. 34, error 26383.576670
Iteration n. 35, error 26326.276075
Iteration n. 36, error 24556.136673
Iteration n. 37, error 22140.508053
Iteration n. 38, error 20033.050640
Iteration n. 39, error 18709.463370
Iteration n. 40, error 18639.682420
Iteration n. 41, error 18656.780620
Iteration n. 42, error 17638.228140
Iteration n. 43, error 16412.753573
Iteration n. 44, error 16080.363424
Iteration n. 45, error 16290.286526
Iteration n. 46, error 16274.663907
Iteration n. 47, error 16177.503199
Iteration n. 48, error 16532.468594
Iteration n. 49, error 17014.170134
Iteration n. 50, error 16899.170513
Iteration n. 51, error 16248.746708
Iteration n. 52, error 15377.334577
Iteration n. 53, error 13973.506596
Iteration n. 54, error 12074.498214
Iteration n. 55, error 11010.710037
Iteration n. 56, error 11572.925918
Iteration n. 57, error 12299.881789
Iteration n. 58, error 12025.032571
Iteration n. 59, error 11390.321461
Iteration n. 60, error 11187.761516
Iteration n. 61, error 11600.801706
Iteration n. 62, error 12527.393424
Iteration n. 63, error 13691.623696
Iteration n. 64, error 14579.529807
Iteration n. 65, error 14515.733207
Iteration n. 66, error 13004.099425
Iteration n. 67, error 10149.735485
Iteration n. 68, error 6863.196535
Iteration n. 69, error 4192.067925
Iteration n. 70, error 2499.299534
Iteration n. 71, error 1631.816146
Iteration n. 72, error 1324.621884
Iteration n. 73, error 1323.129986
Iteration n. 74, error 1457.545735
Iteration n. 75, error 1660.553452
Iteration n. 76, error 1915.630274
Iteration n. 77, error 2222.334136
Iteration n. 78, error 2582.723116
Iteration n. 79, error 2994.774828
Iteration n. 80, error 3447.117721
Iteration n. 81, error 3915.073406
Iteration n. 82, error 4361.912331
Iteration n. 83, error 4750.819148
Iteration n. 84, error 5067.894125
Iteration n. 85, error 5344.808554
Iteration n. 86, error 5665.620391
Iteration n. 87, error 6157.168664
Iteration n. 88, error 6979.549508
Iteration n. 89, error 8327.271663
Iteration n. 90, error 10407.541823
Iteration n. 91, error 13233.717057
Iteration n. 92, error 15939.034640
Iteration n. 93, error 16414.969366
Iteration n. 94, error 13866.130243
Iteration n. 95, error 10111.764624
Iteration n. 96, error 6629.532768
Iteration n. 97, error 4016.629625
Iteration n. 98, error 2324.253993
Iteration n. 99, error 1315.360073
Iteration n. 100, error 737.112514
Iteration n. 101, error 411.424409
Iteration n. 102, error 229.348651
Iteration n. 103, error 127.852237
Iteration n. 104, error 71.315704
Iteration n. 105, error 39.814292
Iteration n. 106, error 22.248880
Iteration n. 107, error 12.444960
Iteration n. 108, error 6.967507
Iteration n. 109, error 3.904213
Iteration n. 110, error 2.189438
Iteration n. 111, error 1.228698
Iteration n. 112, error 0.689989
Iteration n. 113, error 0.387701
Iteration n. 114, error 0.217964
Iteration n. 115, error 0.122599
Iteration n. 116, error 0.068988
Iteration n. 117, error 0.038837
Iteration n. 118, error 0.021871
Iteration n. 119, error 0.012320
Iteration n. 120, error 0.006942
Iteration n. 121, error 0.003913
Iteration n. 122, error 0.002206
Iteration n. 123, error 0.001244
Iteration n. 124, error 0.000702
Iteration n. 125, error 0.000396
Iteration n. 126, error 0.000223
Iteration n. 127, error 0.000126
Iteration n. 128, error 0.000071
Total iterations = 128
Mean Shift wall clock time = 88.984010

8
mean_shift_serial/serial.c

@ -7,19 +7,17 @@
int DEVIATION = 20; int DEVIATION = 20;
int NUMBER_OF_POINTS = 1024; int NUMBER_OF_POINTS = 1024;
int DIMENSIONS = 32; int DIMENSIONS = 32;
char* POINTS_FILENAME = "../data/32"; const char* POINTS_FILENAME = "../data/32";
char* LABELS_FILENAME = "../data/L.bin";
struct timeval startwtime, endwtime; struct timeval startwtime, endwtime;
double seq_time; double seq_time;
int main(int argc, char **argv){ int main(int argc, char **argv){
double **vectors, **shifted_points; double **vectors, **shifted_points;
char *labels;
parameters params; parameters params;
//get_args(argc, argv); commented out while in development get_args(argc, argv);
init(&vectors, &labels, &params); init(&vectors, &params);
//save_matrix(vectors, 0); //save_matrix(vectors, 0);

116
mean_shift_serial/serial.txt

@ -1,116 +0,0 @@
Iteration n. 0, error 927827.679145
Iteration n. 1, error 726816.223326
Iteration n. 2, error 581769.204949
Iteration n. 3, error 477408.630077
Iteration n. 4, error 395485.897206
Iteration n. 5, error 334651.158957
Iteration n. 6, error 292079.617208
Iteration n. 7, error 254134.878622
Iteration n. 8, error 215114.115728
Iteration n. 9, error 182607.082276
Iteration n. 10, error 156266.959549
Iteration n. 11, error 139994.419331
Iteration n. 12, error 125521.301757
Iteration n. 13, error 112218.794486
Iteration n. 14, error 98203.683241
Iteration n. 15, error 85490.183638
Iteration n. 16, error 73443.000140
Iteration n. 17, error 62609.489556
Iteration n. 18, error 59077.977003
Iteration n. 19, error 53892.807510
Iteration n. 20, error 47565.861958
Iteration n. 21, error 45535.865588
Iteration n. 22, error 44789.582377
Iteration n. 23, error 42402.349216
Iteration n. 24, error 39130.442990
Iteration n. 25, error 37194.415972
Iteration n. 26, error 35206.437543
Iteration n. 27, error 32203.737761
Iteration n. 28, error 29549.317563
Iteration n. 29, error 27893.877946
Iteration n. 30, error 27707.173303
Iteration n. 31, error 28305.702063
Iteration n. 32, error 28536.722112
Iteration n. 33, error 27381.782682
Iteration n. 34, error 24461.926511
Iteration n. 35, error 21388.206521
Iteration n. 36, error 19411.085140
Iteration n. 37, error 18062.429515
Iteration n. 38, error 16313.720166
Iteration n. 39, error 14149.621211
Iteration n. 40, error 12735.640987
Iteration n. 41, error 12904.542590
Iteration n. 42, error 14638.353297
Iteration n. 43, error 18306.190364
Iteration n. 44, error 23544.214839
Iteration n. 45, error 23553.140641
Iteration n. 46, error 18392.083676
Iteration n. 47, error 14694.879614
Iteration n. 48, error 12225.420016
Iteration n. 49, error 10739.847211
Iteration n. 50, error 10534.182216
Iteration n. 51, error 11687.348948
Iteration n. 52, error 14062.339499
Iteration n. 53, error 17369.101524
Iteration n. 54, error 20559.831905
Iteration n. 55, error 21136.519253
Iteration n. 56, error 17377.395549
Iteration n. 57, error 11721.238164
Iteration n. 58, error 7238.102387
Iteration n. 59, error 4490.416936
Iteration n. 60, error 3033.925265
Iteration n. 61, error 2436.691557
Iteration n. 62, error 2353.336915
Iteration n. 63, error 2533.497324
Iteration n. 64, error 2856.349865
Iteration n. 65, error 3287.585287
Iteration n. 66, error 3830.184434
Iteration n. 67, error 4497.835721
Iteration n. 68, error 5294.470384
Iteration n. 69, error 6181.873374
Iteration n. 70, error 7025.865491
Iteration n. 71, error 7550.297363
Iteration n. 72, error 7412.828189
Iteration n. 73, error 6485.358048
Iteration n. 74, error 5057.608511
Iteration n. 75, error 3602.756868
Iteration n. 76, error 2419.456728
Iteration n. 77, error 1570.460802
Iteration n. 78, error 1000.682242
Iteration n. 79, error 631.345226
Iteration n. 80, error 396.219332
Iteration n. 81, error 247.948883
Iteration n. 82, error 154.922317
Iteration n. 83, error 96.716125
Iteration n. 84, error 60.351123
Iteration n. 85, error 37.650098
Iteration n. 86, error 23.485083
Iteration n. 87, error 14.648430
Iteration n. 88, error 9.136461
Iteration n. 89, error 5.698500
Iteration n. 90, error 3.554205
Iteration n. 91, error 2.216796
Iteration n. 92, error 1.382645
Iteration n. 93, error 0.862377
Iteration n. 94, error 0.537881
Iteration n. 95, error 0.335487
Iteration n. 96, error 0.209251
Iteration n. 97, error 0.130515
Iteration n. 98, error 0.081405
Iteration n. 99, error 0.050775
Iteration n. 100, error 0.031670
Iteration n. 101, error 0.019754
Iteration n. 102, error 0.012321
Iteration n. 103, error 0.007685
Iteration n. 104, error 0.004794
Iteration n. 105, error 0.002991
Iteration n. 106, error 0.001866
Iteration n. 107, error 0.001165
Iteration n. 108, error 0.000727
Iteration n. 109, error 0.000455
Iteration n. 110, error 0.000285
Iteration n. 111, error 0.000179
Iteration n. 112, error 0.000114
Iteration n. 113, error 0.000073
Total iterations = 113
Mean Shift wall clock time = 216.425137

34
mean_shift_serial/serial_declarations.c

@ -9,13 +9,12 @@
#define OUTPUT_PREFIX "../output/output_" #define OUTPUT_PREFIX "../output/output_"
void get_args(int argc, char **argv){ void get_args(int argc, char **argv){
if (argc != 6) { if (argc != 5) {
printf("Usage: %s h N D Pd Pl\nwhere:\n", argv[0]); printf("Usage: %s h N D Pd Pl\nwhere:\n", argv[0]);
printf("\th is the variance\n"); printf("\th is the variance\n");
printf("\tN is the the number of points\n"); printf("\tN is the the number of points\n");
printf("\tD is the number of dimensions of each point\n"); printf("\tD is the number of dimensions of each point\n");
printf("\tPd is the path of the dataset file\n"); printf("\tPd is the path of the dataset file\n");
printf("\tPl is the path of the labels file\n");
exit(1); exit(1);
} }
@ -23,10 +22,9 @@ void get_args(int argc, char **argv){
NUMBER_OF_POINTS = atoi(argv[2]); NUMBER_OF_POINTS = atoi(argv[2]);
DIMENSIONS = atoi(argv[3]); DIMENSIONS = atoi(argv[3]);
POINTS_FILENAME = argv[4]; POINTS_FILENAME = argv[4];
LABELS_FILENAME = argv[5];
} }
void init(double ***vectors, char **labels, parameters *params){ void init(double ***vectors, parameters *params){
int bytes_read = 0; int bytes_read = 0;
// initializes vectors // initializes vectors
FILE *points_file; FILE *points_file;
@ -53,34 +51,6 @@ void init(double ***vectors, char **labels, parameters *params){
} }
fclose(points_file); fclose(points_file);
// initializes file that will contain the labels (train)
FILE *labels_file;
labels_file = fopen(LABELS_FILENAME, "rb");
if (labels_file != NULL){
// NOTE : Labels were classified as <class 'numpy.uint8'>
// variables of type uint8 are stored as 1-byte (8-bit) unsigned integers
// gets number of labels
fseek(labels_file, 0L, SEEK_END);
long int pos = ftell(labels_file);
rewind(labels_file);
int label_elements = pos/ sizeof(char);
// allocates memory for the array
*labels = (char*)malloc(label_elements* sizeof(char));
fseek(labels_file, 0L, SEEK_SET);
bytes_read = fread((*labels), sizeof(char), label_elements, labels_file);
if ( bytes_read != label_elements ){
if(feof(points_file)){
printf("Premature end of file reached.\n");
} else{
printf("Error reading points file.");
}
fclose(labels_file);
exit(EXIT_FAILURE);
}
}
fclose(labels_file);
// MEAN SHIFT OPTIONS // MEAN SHIFT OPTIONS
params->epsilon = 0.0001; params->epsilon = 0.0001;
params->verbose = false; params->verbose = false;

5
mean_shift_serial/serial_declarations.h

@ -6,8 +6,7 @@
extern int DEVIATION; extern int DEVIATION;
extern int NUMBER_OF_POINTS; extern int NUMBER_OF_POINTS;
extern int DIMENSIONS; extern int DIMENSIONS;
extern char* POINTS_FILENAME; extern const char* POINTS_FILENAME;
extern char* LABELS_FILENAME;
typedef struct parameters { typedef struct parameters {
double epsilon; double epsilon;
@ -19,7 +18,7 @@ typedef struct parameters {
void get_args(int argc, char **argv); void get_args(int argc, char **argv);
//Function init reads the dataset and label arrays from the corresponding files. //Function init reads the dataset and label arrays from the corresponding files.
void init(double ***vectors, char **labels, parameters *params); void init(double ***vectors, parameters *params);
//Function meanshift recursively shifts original points according to th //Function meanshift recursively shifts original points according to th
//mean-shift algorithm saving the result to shiftedPoints. Struct opt has user //mean-shift algorithm saving the result to shiftedPoints. Struct opt has user

42
output/visualization/visualization.pde

@ -1,42 +0,0 @@
int frame = 1;
PShape frameS;
void setup() {
size(720, 720);
frameRate(12);
}
int scale = 1;
float radius = 2;
float maxX = 17.124000;
float minX = 3.402000;
float maxY = 14.996000;
float minY = 3.178000;
void draw() {
background(255);
stroke(0);
//scale(scale);
fill(0);
System.out.println("frame = " + frame);
String[] lines;
lines = loadStrings("../output_" + frame);
if (lines == null){
delay(5000);
exit();
} else {
for (int i = 0; i < lines.length; i++) {
String[] pieces = split(lines[i], ",");
float mapedX = map(Float.parseFloat(pieces[0]), minX, maxX, 0, 720);
float mapedY = map(Float.parseFloat(pieces[1]), minY, maxY, 0, 720);
frameS = createShape(ELLIPSE, mapedX*scale, mapedY*scale, radius, radius);
shape(frameS, 0, 0);
}
}
frame++;
//Uncomment to save each frame to a jpg file
//saveFrame("out-######.jpg");
delay(600);
}

50
stats/32/32_runtime_loceye.txt

@ -1,50 +0,0 @@
Device chosen is "GeForce GTX 1070"
Device has 15 multi processors and compute capability 6.1
Max threads per block supported are 1024
Reading dataset and labels...
Done.
Device memory allocation wall clock time = 0.094704
calculate_kernel_matrix_kernel called with:
dimBlock.x = 32, dimBlock.y = 32
dimGrid.x = 32, dimGrid.y = 32
calculate_denominator called with:
dimBlock.x = 1024, dimBlock.y = 1
dimGrid.x = 1, dimGrid.y = 1
shift_points_kernel called with:
dimBlock.x = 32, dimBlock.y = 32
dimGrid.x = 32, dimGrid.y = 1
Recursion n. 0, error 212.611066
Recursion n. 1, error 51.768217
Recursion n. 2, error 18.321997
Recursion n. 3, error 7.902559
Recursion n. 4, error 3.830385
Recursion n. 5, error 1.990884
Recursion n. 6, error 1.077207
Recursion n. 7, error 0.596253
Recursion n. 8, error 0.334476
Recursion n. 9, error 0.189225
Recursion n. 10, error 0.107681
Recursion n. 11, error 0.061545
Recursion n. 12, error 0.035299
Recursion n. 13, error 0.020304
Recursion n. 14, error 0.011708
Recursion n. 15, error 0.006766
Recursion n. 16, error 0.003918
Recursion n. 17, error 0.002273
Recursion n. 18, error 0.001321
Recursion n. 19, error 0.000769
Recursion n. 20, error 0.000448
Recursion n. 21, error 0.000262
Recursion n. 22, error 0.000153
Recursion n. 23, error 0.000090
Copying between device and host wall clock time = 0.046973
Total number of recursions = 23
Mean Shift wall clock time = 0.713939

26
stats/32/32_runtime_loceye_serial.txt

@ -1,26 +0,0 @@
Iteration n. 0, error 212.611066
Iteration n. 1, error 51.768217
Iteration n. 2, error 18.321997
Iteration n. 3, error 7.902559
Iteration n. 4, error 3.830385
Iteration n. 5, error 1.990884
Iteration n. 6, error 1.077207
Iteration n. 7, error 0.596253
Iteration n. 8, error 0.334476
Iteration n. 9, error 0.189225
Iteration n. 10, error 0.107681
Iteration n. 11, error 0.061545
Iteration n. 12, error 0.035299
Iteration n. 13, error 0.020304
Iteration n. 14, error 0.011708
Iteration n. 15, error 0.006766
Iteration n. 16, error 0.003918
Iteration n. 17, error 0.002273
Iteration n. 18, error 0.001321
Iteration n. 19, error 0.000769
Iteration n. 20, error 0.000448
Iteration n. 21, error 0.000262
Iteration n. 22, error 0.000153
Iteration n. 23, error 0.000090
Total iterations = 23
Mean Shift wall clock time = 2.091754

57
stats/s1/s1_runtime_loceye.txt

@ -1,57 +0,0 @@
Device chosen is "GeForce GTX 1070"
Device has 15 multi processors and compute capability 6.1
Max threads per block supported are 1024
Reading dataset and labels...
Done.
Device memory allocation wall clock time = 0.083752
calculate_kernel_matrix_kernel called with:
dimBlock.x = 32, dimBlock.y = 32
dimGrid.x = 157, dimGrid.y = 157
calculate_denominator called with:
dimBlock.x = 1024, dimBlock.y = 1
dimGrid.x = 5, dimGrid.y = 1
shift_points_kernel called with:
dimBlock.x = 512, dimBlock.y = 2
dimGrid.x = 10, dimGrid.y = 1
Recursion n. 0, error 1433009.094419
Recursion n. 1, error 846076.669706
Recursion n. 2, error 457323.896842
Recursion n. 3, error 232981.679496
Recursion n. 4, error 129695.421325
Recursion n. 5, error 73386.379913
Recursion n. 6, error 42859.404834
Recursion n. 7, error 34613.230704
Recursion n. 8, error 31166.226384
Recursion n. 9, error 25075.599825
Recursion n. 10, error 14788.867230
Recursion n. 11, error 6526.169908
Recursion n. 12, error 2538.871384
Recursion n. 13, error 953.135636
Recursion n. 14, error 354.381780
Recursion n. 15, error 131.434483
Recursion n. 16, error 48.740960
Recursion n. 17, error 18.090348
Recursion n. 18, error 6.723606
Recursion n. 19, error 2.503479
Recursion n. 20, error 0.934231
Recursion n. 21, error 0.349569
Recursion n. 22, error 0.131220
Recursion n. 23, error 0.049442
Recursion n. 24, error 0.018711
Recursion n. 25, error 0.007116
Recursion n. 26, error 0.002722
Recursion n. 27, error 0.001047
Recursion n. 28, error 0.000406
Recursion n. 29, error 0.000158
Recursion n. 30, error 0.000062
Copying between device and host wall clock time = 1.291885
Total number of recursions = 30
Mean Shift wall clock time = 2.356798

33
stats/s1/s1_runtime_loceye_serial.txt

@ -1,33 +0,0 @@
Iteration n. 0, error 1434684.624217
Iteration n. 1, error 846466.140776
Iteration n. 2, error 456756.809962
Iteration n. 3, error 232102.841892
Iteration n. 4, error 128970.916818
Iteration n. 5, error 73131.136038
Iteration n. 6, error 42959.583463
Iteration n. 7, error 34479.873709
Iteration n. 8, error 30952.428863
Iteration n. 9, error 24978.110418
Iteration n. 10, error 14758.750104
Iteration n. 11, error 6515.798691
Iteration n. 12, error 2534.949856
Iteration n. 13, error 951.600403
Iteration n. 14, error 353.770499
Iteration n. 15, error 131.188064
Iteration n. 16, error 48.640549
Iteration n. 17, error 18.049030
Iteration n. 18, error 6.706455
Iteration n. 19, error 2.496305
Iteration n. 20, error 0.931212
Iteration n. 21, error 0.348291
Iteration n. 22, error 0.130677
Iteration n. 23, error 0.049212
Iteration n. 24, error 0.018613
Iteration n. 25, error 0.007074
Iteration n. 26, error 0.002704
Iteration n. 27, error 0.001040
Iteration n. 28, error 0.000403
Iteration n. 29, error 0.000157
Iteration n. 30, error 0.000062
Total iterations = 30
Mean Shift wall clock time = 21.889184

57
stats/s1/s1_runtime_shared_loceye.txt

@ -1,57 +0,0 @@
Device chosen is "GeForce GTX 1070"
Device has 15 multi processors and compute capability 6.1
Max threads per block supported are 1024
Reading dataset and labels...
Done.
Device memory allocation wall clock time = 0.000946
calculate_kernel_matrix_kernel called with:
dimBlock.x = 32, dimBlock.y = 32
dimGrid.x = 157, dimGrid.y = 157
calculate_denominator called with:
dimBlock.x = 1024, dimBlock.y = 1
dimGrid.x = 5, dimGrid.y = 1
shift_points_kernel called with:
dimBlock.x = 240, dimBlock.y = 2
dimGrid.x = 21, dimGrid.y = 1
Recursion n. 0, error 1433009.094419
Recursion n. 1, error 846076.669706
Recursion n. 2, error 457323.896842
Recursion n. 3, error 232981.679496
Recursion n. 4, error 129695.421325
Recursion n. 5, error 73386.379913
Recursion n. 6, error 42859.404834
Recursion n. 7, error 34613.230704
Recursion n. 8, error 31166.226384
Recursion n. 9, error 25075.599825
Recursion n. 10, error 14788.867230
Recursion n. 11, error 6526.169908
Recursion n. 12, error 2538.871384
Recursion n. 13, error 953.135636
Recursion n. 14, error 354.381780
Recursion n. 15, error 131.434483
Recursion n. 16, error 48.740960
Recursion n. 17, error 18.090348
Recursion n. 18, error 6.723606
Recursion n. 19, error 2.503479
Recursion n. 20, error 0.934231
Recursion n. 21, error 0.349569
Recursion n. 22, error 0.131220
Recursion n. 23, error 0.049442
Recursion n. 24, error 0.018711
Recursion n. 25, error 0.007116
Recursion n. 26, error 0.002722
Recursion n. 27, error 0.001047
Recursion n. 28, error 0.000406
Recursion n. 29, error 0.000158
Recursion n. 30, error 0.000062
Copying between device and host wall clock time = 1.294858
Total number of recursions = 30
Mean Shift wall clock time = 2.270383

194
stats/s4/s4_runtime_loceye.txt

@ -1,194 +0,0 @@
Device chosen is "GeForce GTX 1070"
Device has 15 multi processors and compute capability 6.1
Max threads per block supported are 1024
Reading dataset and labels...
Done.
Device memory allocation wall clock time = 0.097832
calculate_kernel_matrix_kernel called with:
dimBlock.x = 32, dimBlock.y = 32
dimGrid.x = 157, dimGrid.y = 157
calculate_denominator called with:
dimBlock.x = 1024, dimBlock.y = 1
dimGrid.x = 5, dimGrid.y = 1
shift_points_kernel called with:
dimBlock.x = 512, dimBlock.y = 2
dimGrid.x = 10, dimGrid.y = 1
Recursion n. 0, error 927692.420199
Recursion n. 1, error 726832.071041
Recursion n. 2, error 581943.008045
Recursion n. 3, error 477910.173261
Recursion n. 4, error 396205.409103
Recursion n. 5, error 335504.131558
Recursion n. 6, error 293282.465763
Recursion n. 7, error 255931.074369
Recursion n. 8, error 217176.502908
Recursion n. 9, error 184225.597806
Recursion n. 10, error 156900.657670
Recursion n. 11, error 139244.876747
Recursion n. 12, error 123863.788594
Recursion n. 13, error 110606.661038
Recursion n. 14, error 97241.407806
Recursion n. 15, error 85097.097975
Recursion n. 16, error 72834.204110
Recursion n. 17, error 61189.351790
Recursion n. 18, error 57114.776420
Recursion n. 19, error 52113.903356
Recursion n. 20, error 46683.503554
Recursion n. 21, error 45627.257398
Recursion n. 22, error 45462.962391
Recursion n. 23, error 43617.801926
Recursion n. 24, error 40957.621436
Recursion n. 25, error 39169.454275
Recursion n. 26, error 36642.554737
Recursion n. 27, error 33234.170852
Recursion n. 28, error 31251.037548
Recursion n. 29, error 30550.469179
Recursion n. 30, error 30200.632861
Recursion n. 31, error 30105.126757
Recursion n. 32, error 29497.004654
Recursion n. 33, error 26733.326716
Recursion n. 34, error 21718.883294
Recursion n. 35, error 16688.390032
Recursion n. 36, error 13392.435100
Recursion n. 37, error 12081.463254
Recursion n. 38, error 12013.260151
Recursion n. 39, error 12125.640867
Recursion n. 40, error 11979.901812
Recursion n. 41, error 11861.625809
Recursion n. 42, error 12699.745511
Recursion n. 43, error 15836.123874
Recursion n. 44, error 21830.150525
Recursion n. 45, error 25973.448245
Recursion n. 46, error 23114.136003
Recursion n. 47, error 19656.849824
Recursion n. 48, error 16376.259816
Recursion n. 49, error 12821.108251
Recursion n. 50, error 10245.687625
Recursion n. 51, error 9512.017920
Recursion n. 52, error 10503.986327
Recursion n. 53, error 12893.633245
Recursion n. 54, error 16395.473470
Recursion n. 55, error 19662.055425
Recursion n. 56, error 19394.169985
Recursion n. 57, error 14735.790724
Recursion n. 58, error 9736.876327
Recursion n. 59, error 6673.528841
Recursion n. 60, error 5378.600020
Recursion n. 61, error 5284.264364
Recursion n. 62, error 5872.926699
Recursion n. 63, error 6832.238864
Recursion n. 64, error 7984.739309
Recursion n. 65, error 9126.007027
Recursion n. 66, error 9953.932568
Recursion n. 67, error 10204.319105
Recursion n. 68, error 9864.246602
Recursion n. 69, error 9020.797079
Recursion n. 70, error 7649.327959
Recursion n. 71, error 5901.336946
Recursion n. 72, error 4179.350770
Recursion n. 73, error 2789.661686
Recursion n. 74, error 1798.661942
Recursion n. 75, error 1138.260267
Recursion n. 76, error 713.324040
Recursion n. 77, error 444.743371
Recursion n. 78, error 276.540458
Recursion n. 79, error 171.704910
Recursion n. 80, error 106.530024
Recursion n. 81, error 66.066664
Recursion n. 82, error 40.963588
Recursion n. 83, error 25.395950
Recursion n. 84, error 15.743686
Recursion n. 85, error 9.759711
Recursion n. 86, error 6.050105
Recursion n. 87, error 3.750486
Recursion n. 88, error 2.324944
Recursion n. 89, error 1.441247
Recursion n. 90, error 0.893441
Recursion n. 91, error 0.553852
Recursion n. 92, error 0.343339
Recursion n. 93, error 0.212840
Recursion n. 94, error 0.131942
Recursion n. 95, error 0.081793
Recursion n. 96, error 0.050705
Recursion n. 97, error 0.031433
Recursion n. 98, error 0.019487
Recursion n. 99, error 0.012081
Recursion n. 100, error 0.007490
Recursion n. 101, error 0.004645
Recursion n. 102, error 0.002881
Recursion n. 103, error 0.001788
Recursion n. 104, error 0.001110
Recursion n. 105, error 0.000691
Recursion n. 106, error 0.000431
Recursion n. 107, error 0.000271
Recursion n. 108, error 0.000172
Recursion n. 109, error 0.000112
Recursion n. 110, error 0.000075
Copying between device and host wall clock time = 4.530233
Total number of recursions = 110
Mean Shift wall clock time = 7.757764
Device chosen is "GeForce GTX 1070"
Device has 15 multi processors and compute capability 6.1
Max threads per block supported are 1024
Reading dataset and labels...
Done.
Device memory allocation wall clock time = 0.101830
calculate_kernel_matrix_kernel called with:
dimBlock.x = 32, dimBlock.y = 32
dimGrid.x = 157, dimGrid.y = 157
calculate_denominator called with:
dimBlock.x = 1024, dimBlock.y = 1
dimGrid.x = 5, dimGrid.y = 1
shift_points_kernel called with:
dimBlock.x = 512, dimBlock.y = 2
dimGrid.x = 10, dimGrid.y = 1
Recursion n. 0, error 1433009.094419
Recursion n. 1, error 846076.669706
Recursion n. 2, error 457323.896842
Recursion n. 3, error 232981.679496
Recursion n. 4, error 129695.421325
Recursion n. 5, error 73386.379913
Recursion n. 6, error 42859.404834
Recursion n. 7, error 34613.230704
Recursion n. 8, error 31166.226384
Recursion n. 9, error 25075.599825
Recursion n. 10, error 14788.867230
Recursion n. 11, error 6526.169908
Recursion n. 12, error 2538.871384
Recursion n. 13, error 953.135636
Recursion n. 14, error 354.381780
Recursion n. 15, error 131.434483
Recursion n. 16, error 48.740960
Recursion n. 17, error 18.090348
Recursion n. 18, error 6.723606
Recursion n. 19, error 2.503479
Recursion n. 20, error 0.934231
Recursion n. 21, error 0.349569
Recursion n. 22, error 0.131220
Recursion n. 23, error 0.049442
Recursion n. 24, error 0.018711
Recursion n. 25, error 0.007116
Recursion n. 26, error 0.002722
Recursion n. 27, error 0.001047
Recursion n. 28, error 0.000406
Recursion n. 29, error 0.000158
Recursion n. 30, error 0.000062
Copying between device and host wall clock time = 1.286918
Total number of recursions = 30
Mean Shift wall clock time = 2.363209

131
stats/s4/s4_runtime_loceye_serial.txt

@ -1,131 +0,0 @@
Iteration n. 0, error 900725.214707
Iteration n. 1, error 711240.249913
Iteration n. 2, error 572364.562445
Iteration n. 3, error 472414.550249
Iteration n. 4, error 391267.517200
Iteration n. 5, error 329290.004875
Iteration n. 6, error 283810.031420
Iteration n. 7, error 250529.098667
Iteration n. 8, error 219842.868199
Iteration n. 9, error 188154.106405
Iteration n. 10, error 164642.483534
Iteration n. 11, error 148533.171361
Iteration n. 12, error 134252.690689
Iteration n. 13, error 119338.958288
Iteration n. 14, error 104756.200878
Iteration n. 15, error 90709.382266
Iteration n. 16, error 81069.664373
Iteration n. 17, error 70968.599390
Iteration n. 18, error 60656.836465
Iteration n. 19, error 52460.966775
Iteration n. 20, error 45983.000348
Iteration n. 21, error 42992.393727
Iteration n. 22, error 40717.963103
Iteration n. 23, error 38174.207978
Iteration n. 24, error 36772.370608
Iteration n. 25, error 37072.563890
Iteration n. 26, error 37664.840852
Iteration n. 27, error 34789.973611
Iteration n. 28, error 32166.214159
Iteration n. 29, error 32054.774268
Iteration n. 30, error 27717.389825
Iteration n. 31, error 24851.091967
Iteration n. 32, error 24807.848863
Iteration n. 33, error 25528.369785
Iteration n. 34, error 26383.576670
Iteration n. 35, error 26326.276075
Iteration n. 36, error 24556.136673
Iteration n. 37, error 22140.508053
Iteration n. 38, error 20033.050640
Iteration n. 39, error 18709.463370
Iteration n. 40, error 18639.682420
Iteration n. 41, error 18656.780620
Iteration n. 42, error 17638.228140
Iteration n. 43, error 16412.753573
Iteration n. 44, error 16080.363424
Iteration n. 45, error 16290.286526
Iteration n. 46, error 16274.663907
Iteration n. 47, error 16177.503199
Iteration n. 48, error 16532.468594
Iteration n. 49, error 17014.170134
Iteration n. 50, error 16899.170513
Iteration n. 51, error 16248.746708
Iteration n. 52, error 15377.334577
Iteration n. 53, error 13973.506596
Iteration n. 54, error 12074.498214
Iteration n. 55, error 11010.710037
Iteration n. 56, error 11572.925918
Iteration n. 57, error 12299.881789
Iteration n. 58, error 12025.032571
Iteration n. 59, error 11390.321461
Iteration n. 60, error 11187.761516
Iteration n. 61, error 11600.801706
Iteration n. 62, error 12527.393424
Iteration n. 63, error 13691.623696
Iteration n. 64, error 14579.529807
Iteration n. 65, error 14515.733207
Iteration n. 66, error 13004.099425
Iteration n. 67, error 10149.735485
Iteration n. 68, error 6863.196535
Iteration n. 69, error 4192.067925
Iteration n. 70, error 2499.299534
Iteration n. 71, error 1631.816146
Iteration n. 72, error 1324.621884
Iteration n. 73, error 1323.129986
Iteration n. 74, error 1457.545735
Iteration n. 75, error 1660.553452
Iteration n. 76, error 1915.630274
Iteration n. 77, error 2222.334136
Iteration n. 78, error 2582.723116
Iteration n. 79, error 2994.774828
Iteration n. 80, error 3447.117721
Iteration n. 81, error 3915.073406
Iteration n. 82, error 4361.912331
Iteration n. 83, error 4750.819148
Iteration n. 84, error 5067.894125
Iteration n. 85, error 5344.808554
Iteration n. 86, error 5665.620391
Iteration n. 87, error 6157.168664
Iteration n. 88, error 6979.549508
Iteration n. 89, error 8327.271663
Iteration n. 90, error 10407.541823
Iteration n. 91, error 13233.717057
Iteration n. 92, error 15939.034640
Iteration n. 93, error 16414.969366
Iteration n. 94, error 13866.130243
Iteration n. 95, error 10111.764624
Iteration n. 96, error 6629.532768
Iteration n. 97, error 4016.629625
Iteration n. 98, error 2324.253993
Iteration n. 99, error 1315.360073
Iteration n. 100, error 737.112514
Iteration n. 101, error 411.424409
Iteration n. 102, error 229.348651
Iteration n. 103, error 127.852237
Iteration n. 104, error 71.315704
Iteration n. 105, error 39.814292
Iteration n. 106, error 22.248880
Iteration n. 107, error 12.444960
Iteration n. 108, error 6.967507
Iteration n. 109, error 3.904213
Iteration n. 110, error 2.189438
Iteration n. 111, error 1.228698
Iteration n. 112, error 0.689989
Iteration n. 113, error 0.387701
Iteration n. 114, error 0.217964
Iteration n. 115, error 0.122599
Iteration n. 116, error 0.068988
Iteration n. 117, error 0.038837
Iteration n. 118, error 0.021871
Iteration n. 119, error 0.012320
Iteration n. 120, error 0.006942
Iteration n. 121, error 0.003913
Iteration n. 122, error 0.002206
Iteration n. 123, error 0.001244
Iteration n. 124, error 0.000702
Iteration n. 125, error 0.000396
Iteration n. 126, error 0.000223
Iteration n. 127, error 0.000126
Iteration n. 128, error 0.000071
Total iterations = 128
Mean Shift wall clock time = 89.742256

3
stats/s4/s4_runtime_loceye_shared.txt

@ -1,3 +0,0 @@
Total number of recursions = 125
Mean Shift wall clock time = 8.656517

118
stats/s4/s4_serial_diades_run.txt

@ -1,118 +0,0 @@
dataset,s4
implem,serial_c_diades
iter,error
0,927827.679145
1,726816.223326
2,581769.204949
3,477408.630077
4,395485.897206
5,334651.158957
6,292079.617208
7,254134.878622
8,215114.115728
9,182607.082276
10,156266.959549
11,139994.419331
12,125521.301757
13,112218.794486
14,98203.683241
15,85490.183638
16,73443.00014
17,62609.489556
18,59077.977003
19,53892.80751
20,47565.861958
21,45535.865588
22,44789.582377
23,42402.349216
24,39130.44299
25,37194.415972
26,35206.437543
27,32203.737761
28,29549.317563
29,27893.877946
30,27707.173303
31,28305.702063
32,28536.722112
33,27381.782682
34,24461.926511
35,21388.206521
36,19411.08514
37,18062.429515
38,16313.720166
39,14149.621211
40,12735.640987
41,12904.54259
42,14638.353297
43,18306.190364
44,23544.214839
45,23553.140641
46,18392.083676
47,14694.879614
48,12225.420016
49,10739.847211
50,10534.182216
51,11687.348948
52,14062.339499
53,17369.101524
54,20559.831905
55,21136.519253
56,17377.395549
57,11721.238164
58,7238.102387
59,4490.416936
60,3033.925265
61,2436.691557
62,2353.336915
63,2533.497324
64,2856.349865
65,3287.585287
66,3830.184434
67,4497.835721
68,5294.470384
69,6181.873374
70,7025.865491
71,7550.297363
72,7412.828189
73,6485.358048
74,5057.608511
75,3602.756868
76,2419.456728
77,1570.460802
78,1000.682242
79,631.345226
80,396.219332
81,247.948883
82,154.922317
83,96.716125
84,60.351123
85,37.650098
86,23.485083
87,14.64843
88,9.136461
89,5.6985
90,3.554205
91,2.216796
92,1.382645
93,0.862377
94,0.537881
95,0.335487
96,0.209251
97,0.130515
98,0.081405
99,0.050775
100,0.03167
101,0.019754
102,0.012321
103,0.007685
104,0.004794
105,0.002991
106,0.001866
107,0.001165
108,0.000727
109,0.000455
110,0.000285
111,0.000179
112,0.000114
113,0.000073
total_time,216.425137

118
stats/s4/s4_serial_local_run.txt

@ -1,118 +0,0 @@
Dataset,S4
Implem,Serial_C_LOCAL_RUN
Iter,Error
0,927827.679145
1,726816.223326
2,581769.204949
3,477408.630077
4,395485.897206
5,334651.158957
6,292079.617208
7,254134.878622
8,215114.115728
9,182607.082276
10,156266.959549
11,139994.419331
12,125521.301757
13,112218.794486
14,98203.683241
15,85490.183638
16,73443.00014
17,62609.489556
18,59077.977003
19,53892.80751
20,47565.861958
21,45535.865588
22,44789.582377
23,42402.349216
24,39130.44299
25,37194.415972
26,35206.437543
27,32203.737761
28,29549.317563
29,27893.877946
30,27707.173303
31,28305.702063
32,28536.722112
33,27381.782682
34,24461.926511
35,21388.206521
36,19411.08514
37,18062.429515
38,16313.720166
39,14149.621211
40,12735.640987
41,12904.54259
42,14638.353297
43,18306.190364
44,23544.214839
45,23553.140641
46,18392.083676
47,14694.879614
48,12225.420016
49,10739.847211
50,10534.182216
51,11687.348948
52,14062.339499
53,17369.101524
54,20559.831905
55,21136.519253
56,17377.395549
57,11721.238164
58,7238.102387
59,4490.416936
60,3033.925265
61,2436.691557
62,2353.336915
63,2533.497324
64,2856.349865
65,3287.585287
66,3830.184434
67,4497.835721
68,5294.470384
69,6181.873374
70,7025.865491
71,7550.297363
72,7412.828189
73,6485.358048
74,5057.608511
75,3602.756868
76,2419.456728
77,1570.460802
78,1000.682242
79,631.345226
80,396.219332
81,247.948883
82,154.922317
83,96.716125
84,60.351123
85,37.650098
86,23.485083
87,14.64843
88,9.136461
89,5.6985
90,3.554205
91,2.216796
92,1.382645
93,0.862377
94,0.537881
95,0.335487
96,0.209251
97,0.130515
98,0.081405
99,0.050775
100,0.03167
101,0.019754
102,0.012321
103,0.007685
104,0.004794
105,0.002991
106,0.001866
107,0.001165
108,0.000727
109,0.000455
110,0.000285
111,0.000179
112,0.000114
113,0.000073
Total_time,327.254941

33
stats/serial_in_Matlab_and_C.txt

@ -1,33 +0,0 @@
Dataset:,X.bin,
Implementation:,Serial_C,
Iterations,Errors,
0,7.503378,
1,2.319695,
2,0.968228,
3,0.493493,
4,0.407053,
5,0.240149,
6,0.222551,
7,0.158582,
8,0.07001,
9,0.006547,
10,0.000603,
11,0.000056,
Total time,0.07168,sec
,,
Dataset:,X.bin,
Implementation:,Matlab,
Iterations,Errors,
1,5.6,
2,1.6,
3,0.78,
4,0.41,
5,0.29,
6,0.19,
7,0.21,
8,0.16,
9,0.07,
10,0.0065,
11,0.0006,
12,0.000056,
Total time,4.3,sec

53
testers/cuda_iteration_test.cu

@ -1,53 +0,0 @@
#include <stdio.h>
#include <assert.h>
#define N 11
#define M 3
__global__ void kernel(float * d_matrix, size_t pitch) {
for (int j = blockIdx.y * blockDim.y + threadIdx.y; j < N; j += blockDim.y * gridDim.y) {
float* row_d_matrix = (float*)((char*)d_matrix + j*pitch);
for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < M; i += blockDim.x * gridDim.x) {
row_d_matrix[i] = (j * M + i) + (j * M + i);
}
}
}
void verify(float *h, float *d, int size) {
for (int i = 0; i < size; i++) {
assert(h[i] == d[i]);
}
printf("Results match\n");
}
int main() {
float *h_matrix;
float *d_matrix;
float *dc_matrix;
h_matrix = (float *) malloc(M * N * sizeof(float));
dc_matrix = (float *) malloc(M * N * sizeof(float));
for (int j = 0; j < N; j++) {
for (int i = 0; i < M; i++) {
h_matrix[j * M + i] = (j * M + i) + (j * M + i);
}
}
size_t pitch;
cudaMallocPitch(&d_matrix, &pitch, M * sizeof(float), N);
dim3 grid(1, 1, 1);
dim3 block(3, 3, 1);
kernel<<<grid, block>>>(d_matrix, pitch);
cudaMemcpy2D(dc_matrix, M * sizeof(float), d_matrix, pitch, M * sizeof(float), N, cudaMemcpyDeviceToHost);
verify(h_matrix, dc_matrix, M * N);
free(h_matrix);
cudaFree(d_matrix);
free(dc_matrix);
}

51
testers/iteration.c

@ -1,51 +0,0 @@
void iteration (int number_of_iterations, int NUMBER_OF_POINTS, int DIMENSIONS, int h){
for (int iter=0; iter < number_of_iterations; iter++){
double accum =0;
for (int i =0; i< NUMBER_OF_POINTS; i++){
for (int j=0; j< NUMBER_OF_POINTS; j++){
//calculate distance between vectors x, y
double sum=0;
double dif;
for (int k=0; k < DIMENSIONS; k++){
// TODO CHANGE NAMES
dif = y[k]-x[k];
sum += dif*dif;
}
double distance = sqrt(sum);
// 2 sparse array
if (distance < h){
kernel_matrix[i][j] = dist;
}else{
kernel_matrix[i][j] = 0;
}
if (kernel_matrix[i][j]!=0){
kernel_matrix[i][j] = kernel_matrix[i][j]*kernel_matrix[i][j];
double pow = ((-1)*(kernel_matrix[i][j]))/(2*(h*h));
kernel_matrix[i][j] = exp(pow);
}
if (i==j){
kernel_matrix[i][j] = kernel_matrix[i][j] +1;
}
accum = accum + kernel_matrix[i][j];
}
denominator[i] = accum;
for (int j =0; j < DIMENSIONS;j++){
new_shift[i][j]=0;
for (int k=0; k<NUMBER_OF_POINTS; k++){
new_shift[i][j] += kernel_matrix[i][k] * original_points[k][j];
}
new_shift[i][j] = new_shift[i][j] / denominator[i];
mean_shift_vector[i][j] = new_shift[i][j] - (*shifted_points)[i][j];
}
}
// frees previously shifted points, they're now garbage
free((*shifted_points)[0]);
// updates shifted points pointer to the new array address
shifted_points = &new_shift;
}// iteration end
}

38
testers/prefactor_code.c

@ -1,38 +0,0 @@
//
// Created by anapt on 18/1/2018.
//
// compute kernel matrix
// // apply function to non zero elements of a sparse matrix
// for (int i=0; i<ROWS; i++){
// for (int j=0; j<ROWS; j++){
// if (W[i][j] != 0){
// double pow = ((-1)*(W[i][j]))/(2*(h*h));
// W[i][j] = exp(pow);
// }
// }
// }
// // make sure diagonal elements are 1
// for (int i=0; i<ROWS; i++){
// for (int j=0; j<ROWS; j++){
// if (i==j){
// W[i][j] = W[i][j] +1;
// }
// }
// }
// // normalize vector
// // allocate memory for vector l [600 1]
// double * l = malloc(ROWS * sizeof(double));
// // calculate sum(W,2)
// // W is a 600 by 600 sparse matrix
// for (int i=0; i<ROWS; i++){
// double sum =0;
// for (int j = 0; j < ROWS; j++){
// sum = sum + W[i][j];
// }
// l[i] = sum;
// }

114
testers/test_code.c

@ -1,114 +0,0 @@
//
// Created by anapt on 17/1/2018.
//
#include<stdio.h>
#include<math.h>
#include <stdlib.h>
#include <sys/time.h>
#include <float.h>
#include <stdbool.h>
#define X "data/X.bin"
#define Y "data/X.bin"
#define COLUMNS 2
#define ROWS 6
#define N 4
// allocates a 2d array in continuous memory positions
double **alloc_2d_double(int rows, int cols) {
double *data = (double *)malloc(rows*cols*sizeof(double));
double **array= (double **)malloc(rows*sizeof(double*));
for (int i=0; i<rows; i++)
array[i] = &(data[cols*i]);
return array;
}
// copy the values of a 2d double array to another
double **duplicate(double **a, double **b, int rows, int cols){
for (int i=0;i<rows;i++){
for (int j=0;j<cols;j++){
b[i][j] = a[i][j];
}
}
return b;
}
// TODO check why there's is a difference in the norm calculate in matlab
double norm(double ** m, int rows, int cols){
double sum=0, a=0;
for (int i = 0; i < rows; i++) {
for (int j = 0; j < cols; j++) {
a = m[i][j] * m[i][j];
sum = sum + a;
}
}
double norm = sqrt(sum);
return norm;
}
double norm2(double ** m, int rows, int cols){
double sum=0, a=0;
for (int i = 0; i < cols; i++) {
for (int j = 0; j < rows; j++) {
a = m[i][j] * m[i][j];
sum = sum + a;
}
}
double norm = sqrt(sum);
return norm;
}
void multiply(double ** matrix1, double ** matrix2, double ** output){
// W dims are ROWS ROWS and x dims are ROWS COLUMNS
// TODO IMPLEMENT
int i, j, k;
for (i=0; i<ROWS; i++){
for (j=0; j<COLUMNS; j++){
output[i][j] = 0;
for (k=0; k<ROWS; k++){
output[i][j] += matrix1[i][k] * matrix2[k][j];
}
}
}
}
void print_matrix(double ** array, int rows, int cols){
for (int i=0; i<rows; i++){
for (int j=0; j<cols; j++){
printf("%f ", array[i][j]);
}
printf("\n");
}
}
int main()
{
double ** vector1, **vector2, **res;
// printf("norm : %f \n", norm(vectors, ROWS, COLUMNS));
// printf("norm : %f \n", norm2(vectors, ROWS, COLUMNS));
FILE *f;
f = fopen(X, "rb");
vector1 = alloc_2d_double(ROWS, ROWS);
for (int i=0; i<ROWS; i++){
int out = fread(vector1[i], sizeof(double), ROWS, f);
}
fclose(f);
FILE *f2;
f2 = fopen(Y, "rb");
vector2 = alloc_2d_double(ROWS, COLUMNS);
for (int i=0; i<ROWS; i++){
int out = fread(vector2[i], sizeof(double), COLUMNS, f2);
}
fclose(f2);
res = alloc_2d_double(ROWS, COLUMNS);
multiply(vector1, vector2, res);
print_matrix(vector1, ROWS, ROWS);
print_matrix(vector2, ROWS, COLUMNS);
print_matrix(res, ROWS, COLUMNS);
return 0;
}
Loading…
Cancel
Save