Browse Source

Minor tweaks

master
Apostolos Fanakis 7 years ago
parent
commit
106a7f449f
  1. 16
      mean_shift_cuda/Makefile
  2. BIN
      mean_shift_cuda/meanshift
  3. 20
      mean_shift_cuda/meanshift.cu
  4. 2
      mean_shift_cuda/meanshift_kernels.cu
  5. 136
      mean_shift_cuda/meanshift_utils.cu
  6. 22
      mean_shift_cuda/meanshift_utils.h

16
mean_shift_cuda/Makefile

@ -3,13 +3,19 @@ SHELL := /bin/bash
# ============================================ # ============================================
# COMMANDS # COMMANDS
CC = /usr/local/cuda/bin/nvcc CC = nvcc
RM = rm -f
HOST_COMPILER = -ccbin gcc HOST_COMPILER = -ccbin gcc
CFLAGS= -arch=sm_21 -lm -O0 -I. -Wno-deprecated-gpu-targets CUDA_FLAGS = -arch=sm_21 -Wno-deprecated-gpu-targets
C_FLAGS = -lm -O3 -I.
COMPILE_FLAGS = $(HOST_COMPILER) -x cu $(CUDA_FLAGS) -dc $(C_FLAGS)
LINK_FLAGS = $(HOST_COMPILER) $(CUDA_FLAGS) $(C_FLAGS)
OBJ = meanshift.o meanshift_utils.o meanshift_kernels.o OBJ = meanshift.o meanshift_utils.o meanshift_kernels.o
DEPS = meanshift_utils.h meanshift_kernels.h DEPS = meanshift_utils.h meanshift_kernels.h
RM = rm -f
# ========================================== # ==========================================
# TARGETS # TARGETS
@ -23,7 +29,7 @@ all: $(EXECUTABLES)
# DEPENDENCIES (HEADERS) # DEPENDENCIES (HEADERS)
%.o: %.cu $(DEPS) %.o: %.cu $(DEPS)
$(CC) $(HOST_COMPILER) -x cu $(CFLAGS) -dc $< -o $@ $(CC) $(COMPILE_FLAGS) $< -o $@
.PRECIOUS: $(EXECUTABLES) $(OBJ) .PRECIOUS: $(EXECUTABLES) $(OBJ)
@ -31,7 +37,7 @@ all: $(EXECUTABLES)
# EXECUTABLE (MAIN) # EXECUTABLE (MAIN)
$(EXECUTABLES): $(OBJ) $(EXECUTABLES): $(OBJ)
$(CC) $(HOST_COMPILER) $(CFLAGS) $(OBJ) -o $@ $(CC) $(LINK_FLAGS) $(OBJ) -o $@
clean: clean:
$(RM) *.o *~ $(EXECUTABLES) $(RM) *.o *~ $(EXECUTABLES)

BIN
mean_shift_cuda/meanshift

Binary file not shown.

20
mean_shift_cuda/meanshift.cu

@ -9,30 +9,38 @@ int NUMBER_OF_POINTS = 600;
int DIMENSIONS = 2; int DIMENSIONS = 2;
char* POINTS_FILENAME = "../data/X.bin"; char* POINTS_FILENAME = "../data/X.bin";
char* LABELS_FILENAME = "../data/L.bin"; char* LABELS_FILENAME = "../data/L.bin";
parameters params;
struct timeval startwtime, endwtime; struct timeval startwtime, endwtime;
double seq_time; double seq_time;
int main(int argc, char **argv){ int main(int argc, char **argv){
int iterations;
double **vectors, **shifted_points; double **vectors, **shifted_points;
char *labels; char *labels;
parameters params;
//get_args(argc, argv); commented out while in development params.epsilon = 0.0001;
init(&vectors, &labels, &params); params.verbose = true;
params.display = true;
//get_args(argc, argv, &params); //commented out while in development
init(&vectors, &labels);
//save_matrix(vectors, 0); //save_matrix(vectors, 0);
// tic // tic
gettimeofday (&startwtime, NULL); gettimeofday (&startwtime, NULL);
int iterations = meanshift(vectors, &shifted_points, DEVIATION, &params); iterations = meanshift(vectors, &shifted_points, DEVIATION, &params);
printf("Total iterations = %d\n", iterations);
// toc // toc
gettimeofday (&endwtime, NULL); gettimeofday (&endwtime, NULL);
seq_time = (double)((endwtime.tv_usec - startwtime.tv_usec)/1.0e6 + endwtime.tv_sec - startwtime.tv_sec); seq_time = (double)((endwtime.tv_usec - startwtime.tv_usec)/1.0e6 + endwtime.tv_sec - startwtime.tv_sec);
printf("%s wall clock time = %f\n","Mean Shift", seq_time);
if (params.verbose){
printf("\nTotal number of iterations = %d\n", iterations);
printf("%s wall clock time = %f\n","Mean Shift", seq_time);
}
//TODO write output points to file -> plot later //TODO write output points to file -> plot later
//save_matrix(shifted_points, iterations); //save_matrix(shifted_points, iterations);

2
mean_shift_cuda/meanshift_kernels.cu

@ -5,8 +5,8 @@ __global__ void multiply_kernel(Matrix matrix1, Matrix matrix2, Matrix output){
// Each thread computes one element of output // Each thread computes one element of output
// by accumulating results into cell_value // by accumulating results into cell_value
double cell_value = 0; double cell_value = 0;
int col = blockIdx.y * blockDim.y + threadIdx.y;
int row = blockIdx.x * blockDim.x + threadIdx.x; int row = blockIdx.x * blockDim.x + threadIdx.x;
int col = blockIdx.y * blockDim.y + threadIdx.y;
if (row + col < output.height * output.width){ if (row + col < output.height * output.width){
for (int element_index = 0; element_index < matrix1.width; ++element_index){ for (int element_index = 0; element_index < matrix1.width; ++element_index){

136
mean_shift_cuda/meanshift_utils.cu

@ -8,28 +8,66 @@
#include "meanshift_kernels.h" #include "meanshift_kernels.h"
#define OUTPUT_PREFIX "../output/output_" #define OUTPUT_PREFIX "../output/output_"
#define BLOCK_SIZE 8 int BLOCK_SIZE = 16;
void get_args(int argc, char **argv){ cudaDeviceProp device_properties;
if (argc != 6) {
printf("Usage: %s h N D Pd Pl\nwhere:\n", argv[0]); void get_args(int argc, char **argv, parameters *params){
printf("\th is the variance\n"); if (argc < 7) {
printf("\tN is the the number of points\n"); printf("Usage: %s h e N D Pd Pl\nwhere:\n"
printf("\tD is the number of dimensions of each point\n"); "\th is the variance\n"
printf("\tPd is the path of the dataset file\n"); "\te is the min distance, between two points, that is taken into account in computations\n"
printf("\tPl is the path of the labels file\n"); "\tN is the the number of points\n"
"\tD is the number of dimensions of each point\n"
"\tPd is the path of the dataset file\n"
"\tPl is the path of the labels file\n"
"\n\t--verbose | -v is an optional flag to enable execution information output"
"\n\t--output | -o is an optional flag to enable points output in each iteration", argv[0]);
exit(1); exit(1);
} }
DEVIATION = atoi(argv[1]); DEVIATION = atoi(argv[1]);
NUMBER_OF_POINTS = atoi(argv[2]); params->epsilon = atof(argv[2]);
DIMENSIONS = atoi(argv[3]); NUMBER_OF_POINTS = atoi(argv[3]);
POINTS_FILENAME = argv[4]; DIMENSIONS = atoi(argv[4]);
LABELS_FILENAME = argv[5]; POINTS_FILENAME = argv[5];
LABELS_FILENAME = argv[6];
params->verbose = false;
params->display = false;
if (argc > 7){
for (int index=7; index<argc; ++index){
if (!strcmp(argv[index], "--verbose") || !strcmp(argv[index], "-v")){
params->verbose = true;
} else if (!strcmp(argv[index], "--output") || !strcmp(argv[index], "-o")){
params->display = true;
} else {
printf("Couldn't parse argument %d: %s\n", index, argv[index]);
exit(EXIT_FAILURE);
}
}
}
/*printf("DEVIATION = %d\n"
"epsilon = %f\n"
"NUMBER_OF_POINTS = %d\n"
"DIMENSIONS = %d\n"
"POINTS_FILENAME = %s\n"
"LABELS_FILENAME = %s\n"
"verbose = %d\n"
"display = %d\n", DEVIATION, params->epsilon, NUMBER_OF_POINTS, DIMENSIONS, POINTS_FILENAME
, LABELS_FILENAME, params->verbose, params->display);*/
} }
void init(double ***vectors, char **labels, parameters *params){ void init(double ***vectors, char **labels){
int bytes_read = 0; int bytes_read = 0;
set_Gpu();
if (params.verbose){
printf("Reading dataset and labels...\n");
}
// initializes vectors // initializes vectors
FILE *points_file; FILE *points_file;
points_file = fopen(POINTS_FILENAME, "rb"); points_file = fopen(POINTS_FILENAME, "rb");
@ -83,10 +121,45 @@ void init(double ***vectors, char **labels, parameters *params){
} }
fclose(labels_file); fclose(labels_file);
// MEAN SHIFT OPTIONS if (params.verbose){
params->epsilon = 0.0001; printf("Done.\n\n");
params->verbose = false; }
params->display = false; }
//Based on https://stackoverflow.com/a/28113186
//Poio psagmeno link https://www.cs.virginia.edu/~csadmin/wiki/index.php/CUDA_Support/Choosing_a_GPU
void set_Gpu(){
int devices_count = 0, max_multiprocessors = 0, max_device = 0;
// gets devices count checking for errors like no devices or no drivers to check for
// devices available
gpuErrchk( cudaGetDeviceCount(&devices_count) );
for(int device_index = 0; device_index < devices_count; ++device_index){
// gets current index device's properties
cudaDeviceProp this_device_properties;
gpuErrchk( cudaGetDeviceProperties(&this_device_properties, device_index) );
// stores best available device's index
// only devices with compute capability >= 2.0 are able to run the code
if (max_multiprocessors < this_device_properties.multiProcessorCount
&& this_device_properties.major >= 2 && this_device_properties.minor >= 0){
// stores devices properties for later use
device_properties = this_device_properties;
max_multiprocessors = this_device_properties.multiProcessorCount;
max_device = device_index;
}
}
// sets the device
gpuErrchk( cudaSetDevice(max_device) );
BLOCK_SIZE = device_properties.maxThreadsPerBlock;
if (params.verbose){
printf("Device chosen is \"%s\"\n"
"Device has %d multi processors and compute capability %d.%d\n"
"Setting BLOCK_SIZE to max threads per block supported (%d)\n\n"
, device_properties.name
, device_properties.multiProcessorCount, device_properties.major, device_properties.minor
, BLOCK_SIZE);
}
} }
int meanshift(double **original_points, double ***shifted_points, int deviation int meanshift(double **original_points, double ***shifted_points, int deviation
@ -157,11 +230,15 @@ int meanshift(double **original_points, double ***shifted_points, int deviation
// updates shifted points pointer to the new array address // updates shifted points pointer to the new array address
shifted_points = &new_shift; shifted_points = &new_shift;
save_matrix((*shifted_points), iteration); if (params.display){
save_matrix((*shifted_points), iteration);
}
// calculates norm of the new mean shift vector // calculates norm of the new mean shift vector
double current_norm = norm(mean_shift_vector, NUMBER_OF_POINTS, DIMENSIONS); double current_norm = norm(mean_shift_vector, NUMBER_OF_POINTS, DIMENSIONS);
printf("Iteration n. %d, error %f \n", iteration, current_norm); if (params.verbose){
printf("Iteration n. %d, error %f \n", iteration, current_norm);
}
/** iterates until convergence **/ /** iterates until convergence **/
if (current_norm > opt->epsilon) { if (current_norm > opt->epsilon) {
@ -170,7 +247,7 @@ int meanshift(double **original_points, double ***shifted_points, int deviation
} }
if (iteration == 0){ if (iteration == 0){
// cleans up this iteration's allocations // cleans up allocations
free(mean_shift_vector[0]); free(mean_shift_vector[0]);
free(mean_shift_vector); free(mean_shift_vector);
free(kernel_matrix[0]); free(kernel_matrix[0]);
@ -195,6 +272,8 @@ double norm(double **matrix, int rows, int cols){
} }
void multiply(double **kernel_matrix, double **original_points, double ***new_shift){ void multiply(double **kernel_matrix, double **original_points, double ***new_shift){
static bool firstIter = true;
// allocates memory for kernel_matrix in GPU and copies the array // allocates memory for kernel_matrix in GPU and copies the array
Matrix d_kernel_matrix; Matrix d_kernel_matrix;
d_kernel_matrix.width = NUMBER_OF_POINTS; d_kernel_matrix.width = NUMBER_OF_POINTS;
@ -220,11 +299,16 @@ void multiply(double **kernel_matrix, double **original_points, double ***new_sh
size = NUMBER_OF_POINTS * DIMENSIONS * sizeof(double); size = NUMBER_OF_POINTS * DIMENSIONS * sizeof(double);
gpuErrchk( cudaMalloc(&d_new_shift.elements, size) ); gpuErrchk( cudaMalloc(&d_new_shift.elements, size) );
//dim3 dimBlock(16, 16); dim3 dimBlock((d_new_shift.height < sqrt(BLOCK_SIZE)) ? d_new_shift.height : sqrt(BLOCK_SIZE)
//dim3 dimGrid(d_original_points.width / dimBlock.x, d_kernel_matrix.height / dimBlock.y); , (d_new_shift.width < sqrt(BLOCK_SIZE)) ? d_new_shift.width : sqrt(BLOCK_SIZE));
dim3 dimGrid((d_new_shift.height + dimBlock.x - 1) / dimBlock.x
, (d_new_shift.width + dimBlock.y - 1) / dimBlock.y);
dim3 dimBlock(10, 2); if (firstIter && params.verbose){
dim3 dimGrid(60, 1); printf("dimBlock.x = %d, dimBlock.y = %d\n", dimBlock.x, dimBlock.y);
printf("dimGrid.x = %d, dimGrid.y = %d\n\n", dimGrid.x, dimGrid.y);
firstIter = false;
}
multiply_kernel<<<dimGrid, dimBlock>>>(d_kernel_matrix, d_original_points, d_new_shift); multiply_kernel<<<dimGrid, dimBlock>>>(d_kernel_matrix, d_original_points, d_new_shift);
gpuErrchk( cudaPeekAtLastError() ); gpuErrchk( cudaPeekAtLastError() );

22
mean_shift_cuda/meanshift_utils.h

@ -14,23 +14,29 @@ inline void gpuAssert(cudaError_t code, const char *file, int line, bool abort=t
} }
} }
extern int DEVIATION; /* Structs */
extern int NUMBER_OF_POINTS;
extern int DIMENSIONS;
extern char* POINTS_FILENAME;
extern char* LABELS_FILENAME;
typedef struct parameters { typedef struct parameters {
double epsilon; double epsilon;
bool verbose; bool verbose;
bool display; bool display;
} parameters; } parameters;
/* Global variables */
extern int DEVIATION;
extern int NUMBER_OF_POINTS;
extern int DIMENSIONS;
extern char* POINTS_FILENAME;
extern char* LABELS_FILENAME;
extern parameters params;
extern cudaDeviceProp device_properties;
//Function get_args parses command line arguments. //Function get_args parses command line arguments.
void get_args(int argc, char **argv); void get_args(int argc, char **argv, parameters *params);
//Function init reads the dataset and label arrays from the corresponding files. //Function init reads the dataset and label arrays from the corresponding files.
void init(double ***vectors, char **labels, parameters *params); void init(double ***vectors, char **labels);
void set_Gpu();
//Function meanshift recursively shifts original points according to th //Function meanshift recursively shifts original points according to th
//mean-shift algorithm saving the result to shiftedPoints. Struct opt has user //mean-shift algorithm saving the result to shiftedPoints. Struct opt has user

Loading…
Cancel
Save