Browse Source

Improve memory management

master
Apostolos Fanakis 7 years ago
parent
commit
a23fd2ff42
  1. 9
      mean_shift_cuda_shared_mem/meanshift.cu
  2. 49
      mean_shift_cuda_shared_mem/meanshift_gpu_utils.cu
  3. 10
      mean_shift_cuda_shared_mem/meanshift_gpu_utils.h
  4. 4
      mean_shift_cuda_shared_mem/meanshift_kernels.cu
  5. 6
      mean_shift_cuda_shared_mem/meanshift_kernels.h
  6. 6
      mean_shift_cuda_shared_mem/meanshift_utils.h

9
mean_shift_cuda_shared_mem/meanshift.cu

@ -5,10 +5,10 @@
#include "meanshift_utils.h" #include "meanshift_utils.h"
#include "meanshift_gpu_utils.h" #include "meanshift_gpu_utils.h"
int DEVIATION = 20; int DEVIATION = 1;
int NUMBER_OF_POINTS = 1024; int NUMBER_OF_POINTS = 600;
int DIMENSIONS = 32; int DIMENSIONS = 2;
const char *POINTS_FILENAME = "../data/32"; const char *POINTS_FILENAME = "../data/X.bin";
const char *LABELS_FILENAME = "../data/L.bin"; const char *LABELS_FILENAME = "../data/L.bin";
parameters params; parameters params;
@ -33,7 +33,6 @@ int main(int argc, char **argv){
// toc // toc
gettimeofday (&endwtime, NULL); gettimeofday (&endwtime, NULL);
seq_time = (double)((endwtime.tv_usec - startwtime.tv_usec)/1.0e6 + endwtime.tv_sec - startwtime.tv_sec); seq_time = (double)((endwtime.tv_usec - startwtime.tv_usec)/1.0e6 + endwtime.tv_sec - startwtime.tv_sec);
printf("\nTotal number of recursions = %d\n", recursions); printf("\nTotal number of recursions = %d\n", recursions);
printf("%s wall clock time = %f\n","Mean Shift", seq_time); printf("%s wall clock time = %f\n","Mean Shift", seq_time);

49
mean_shift_cuda_shared_mem/meanshift_gpu_utils.cu

@ -55,24 +55,26 @@ void set_GPU(){
int meanshift(double **original_points, double ***shifted_points, int deviation){ int meanshift(double **original_points, double ***shifted_points, int deviation){
// host variables // host variables
int size = 0;
static int recursion = 0; static int recursion = 0;
static double **kernel_matrix, **mean_shift_vector, w_memcpy_time; static double **kernel_matrix, **new_shift, **mean_shift_vector, w_memcpy_time;
double **new_shift, current_norm = 0, tmp_w_memcpy_time; double current_norm = 0, tmp_w_memcpy_time;
bool is_first_recursion = false; bool is_first_recursion = false;
// device variables // device variables
static Matrix d_original_points, d_shifted_points, d_kernel_matrix, d_denominator, static Matrix d_original_points, d_shifted_points, d_kernel_matrix, d_denominator,
d_mean_shift_vector; d_new_shift, d_mean_shift_vector;
Matrix d_new_shift;
// allocates memory and copies original points on first recursion // allocates memory and copies original points on first recursion
if (recursion == 0 || (*shifted_points) == NULL){ if (recursion == 0 || (*shifted_points) == NULL){
is_first_recursion = true; is_first_recursion = true;
// allocates memory for shifted points array and copies original points into it // allocates memory for shifted points array and copies original points into it
(*shifted_points) = alloc_double(NUMBER_OF_POINTS, DIMENSIONS); (*shifted_points) = alloc_double(NUMBER_OF_POINTS, DIMENSIONS);
duplicate(original_points, NUMBER_OF_POINTS, DIMENSIONS, shifted_points); duplicate(original_points, NUMBER_OF_POINTS, DIMENSIONS, shifted_points);
// allocates memory for new shift array
new_shift = alloc_double(NUMBER_OF_POINTS, DIMENSIONS);
// allocates memory for mean shift vector // allocates memory for mean shift vector
mean_shift_vector = alloc_double(NUMBER_OF_POINTS, DIMENSIONS); mean_shift_vector = alloc_double(NUMBER_OF_POINTS, DIMENSIONS);
// initializes elements of mean_shift_vector to inf // initializes elements of mean_shift_vector to inf
@ -90,7 +92,7 @@ int meanshift(double **original_points, double ***shifted_points, int deviation)
// allocates corresponding memory in device // allocates corresponding memory in device
init_device_memory(original_points, *shifted_points, &d_original_points, &d_shifted_points, init_device_memory(original_points, *shifted_points, &d_original_points, &d_shifted_points,
&d_kernel_matrix, &d_denominator, &d_mean_shift_vector); &d_kernel_matrix, &d_denominator, &d_new_shift, &d_mean_shift_vector);
// toc // toc
gettimeofday (&end_w_time, NULL); gettimeofday (&end_w_time, NULL);
seq = (double)((end_w_time.tv_usec - start_w_time.tv_usec) seq = (double)((end_w_time.tv_usec - start_w_time.tv_usec)
@ -110,27 +112,19 @@ int meanshift(double **original_points, double ***shifted_points, int deviation)
// calculates denominator // calculates denominator
calculate_denominator(d_kernel_matrix, d_denominator); calculate_denominator(d_kernel_matrix, d_denominator);
// creates new y vector
// allocates memory in every recursion
new_shift = alloc_double(NUMBER_OF_POINTS, DIMENSIONS);
// allocates corresponding memory in device
d_new_shift.width = DIMENSIONS;
d_new_shift.height = NUMBER_OF_POINTS;
d_new_shift.stride = d_new_shift.width;
size = NUMBER_OF_POINTS * DIMENSIONS * sizeof(double);
gpuErrchk( cudaMalloc(&(d_new_shift.elements), size) );
shift_points(d_kernel_matrix, d_original_points, d_shifted_points, d_new_shift, d_denominator, shift_points(d_kernel_matrix, d_original_points, d_shifted_points, d_new_shift, d_denominator,
d_mean_shift_vector, kernel_matrix, original_points, &new_shift, &mean_shift_vector, d_mean_shift_vector, kernel_matrix, original_points, &new_shift, &mean_shift_vector,
&tmp_w_memcpy_time); &tmp_w_memcpy_time);
w_memcpy_time += tmp_w_memcpy_time; w_memcpy_time += tmp_w_memcpy_time;
// frees previously shifted points, they're now garbage
free((*shifted_points)[0]);
gpuErrchk( cudaFree(d_shifted_points.elements) );
// updates shifted points pointer to the new array address // updates shifted points pointer to the new array address
double ***temp = shifted_points;
shifted_points = &new_shift; shifted_points = &new_shift;
new_shift = *temp;
double *d_temp = d_shifted_points.elements;
d_shifted_points.elements = d_new_shift.elements; d_shifted_points.elements = d_new_shift.elements;
d_new_shift.elements = d_temp;
if (params.display){ if (params.display){
save_matrix((*shifted_points), recursion); save_matrix((*shifted_points), recursion);
@ -172,8 +166,11 @@ int meanshift(double **original_points, double ***shifted_points, int deviation)
free(mean_shift_vector); free(mean_shift_vector);
free(kernel_matrix[0]); free(kernel_matrix[0]);
free(kernel_matrix); free(kernel_matrix);
//free(new_shift[0]);
//free(new_shift);
free_device_memory(d_original_points, d_kernel_matrix, d_denominator, d_shifted_points); free_device_memory(d_original_points, d_kernel_matrix, d_denominator, d_shifted_points,
d_new_shift);
} }
return recursion; return recursion;
@ -181,7 +178,7 @@ int meanshift(double **original_points, double ***shifted_points, int deviation)
void init_device_memory(double **original_points, double **shifted_points, void init_device_memory(double **original_points, double **shifted_points,
Matrix *d_original_points, Matrix *d_shifted_points, Matrix *d_kernel_matrix, Matrix *d_original_points, Matrix *d_shifted_points, Matrix *d_kernel_matrix,
Matrix *d_denominator, Matrix *d_mean_shift_vector){ Matrix *d_denominator, Matrix *d_new_shift, Matrix *d_mean_shift_vector){
int size; int size;
// allocates memory for original_points in GPU and copies the array // allocates memory for original_points in GPU and copies the array
@ -216,6 +213,13 @@ void init_device_memory(double **original_points, double **shifted_points,
size = NUMBER_OF_POINTS * sizeof(double); size = NUMBER_OF_POINTS * sizeof(double);
gpuErrchk( cudaMalloc(&(d_denominator->elements), size) ); gpuErrchk( cudaMalloc(&(d_denominator->elements), size) );
// allocates memory for new_shift in GPU
d_new_shift->width = DIMENSIONS;
d_new_shift->height = NUMBER_OF_POINTS;
d_new_shift->stride = d_new_shift->width;
size = NUMBER_OF_POINTS * DIMENSIONS * sizeof(double);
gpuErrchk( cudaMalloc(&(d_new_shift->elements), size) );
// allocates memory for mean_shift_vector in GPU // allocates memory for mean_shift_vector in GPU
d_mean_shift_vector->width = DIMENSIONS; d_mean_shift_vector->width = DIMENSIONS;
d_mean_shift_vector->height = NUMBER_OF_POINTS; d_mean_shift_vector->height = NUMBER_OF_POINTS;
@ -367,10 +371,11 @@ void shift_points(Matrix d_kernel_matrix, Matrix d_original_points, Matrix d_shi
} }
void free_device_memory(Matrix d_original_points, Matrix d_kernel_matrix, Matrix d_denominator, void free_device_memory(Matrix d_original_points, Matrix d_kernel_matrix, Matrix d_denominator,
Matrix d_shifted_points){ Matrix d_shifted_points, Matrix d_new_shift){
// frees all memory previously allocated in device // frees all memory previously allocated in device
gpuErrchk( cudaFree(d_original_points.elements) ); gpuErrchk( cudaFree(d_original_points.elements) );
gpuErrchk( cudaFree(d_kernel_matrix.elements) ); gpuErrchk( cudaFree(d_kernel_matrix.elements) );
gpuErrchk( cudaFree(d_denominator.elements) ); gpuErrchk( cudaFree(d_denominator.elements) );
gpuErrchk( cudaFree(d_shifted_points.elements) ); gpuErrchk( cudaFree(d_shifted_points.elements) );
gpuErrchk( cudaFree(d_new_shift.elements) );
} }

10
mean_shift_cuda_shared_mem/meanshift_gpu_utils.h

@ -1,5 +1,5 @@
#ifndef SERIAL_GPU_UTILS_H /* Include guard */ #ifndef MEANSHIFT_GPU_UTILS_H /* Include guard */
#define SERIAL_GPU_UTILS_H #define MEANSHIFT_GPU_UTILS_H
#include "meanshift_kernels.h" #include "meanshift_kernels.h"
@ -33,7 +33,7 @@ int meanshift(double **original_points, double ***shifted_points, int h);
//Function init_device_memory allocates memory for necessary arrays in the device //Function init_device_memory allocates memory for necessary arrays in the device
void init_device_memory(double **original_points, double **shifted_points, void init_device_memory(double **original_points, double **shifted_points,
Matrix *d_original_points, Matrix *d_shifted_points, Matrix *d_kernel_matrix, Matrix *d_original_points, Matrix *d_shifted_points, Matrix *d_kernel_matrix,
Matrix *d_denominator, Matrix *d_new_shift); Matrix *d_denominator, Matrix *d_new_shift, Matrix *d_mean_shift_vector);
//Function calculate_kernel_matrix is a wrapper for the kernel call of the corresponding kernel //Function calculate_kernel_matrix is a wrapper for the kernel call of the corresponding kernel
//"calculate_kernel_matrix_kernel" that calculates the kernel matrix //"calculate_kernel_matrix_kernel" that calculates the kernel matrix
@ -53,6 +53,6 @@ void shift_points(Matrix d_kernel_matrix, Matrix d_original_points, Matrix d_shi
//Function free_device_memory frees device's previously allocated memory //Function free_device_memory frees device's previously allocated memory
void free_device_memory(Matrix d_original_points, Matrix d_kernel_matrix, Matrix d_denominator, void free_device_memory(Matrix d_original_points, Matrix d_kernel_matrix, Matrix d_denominator,
Matrix d_shifted_points); Matrix d_shifted_points, Matrix d_new_shift);
#endif //SERIAL_GPU_UTILS_H #endif //MEANSHIFT_GPU_UTILS_H

4
mean_shift_cuda_shared_mem/meanshift_kernels.cu

@ -68,8 +68,8 @@ __global__ void shift_points_kernel(Matrix original_points, Matrix kernel_matrix
int col = threadIdx.y; int col = threadIdx.y;
// performs calculations only if thread's indexes are within matrix bounds // performs calculations only if thread's indexes are within matrix bounds
if ((ROW_BLOCK_SIZE * block_row + row) > new_shift.height || if ((ROW_BLOCK_SIZE * block_row + row) >= new_shift.height ||
(COLUMN_BLOCK_SIZE * block_col + col) > new_shift.width){ (COLUMN_BLOCK_SIZE * block_col + col) >= new_shift.width){
return; return;
} }

6
mean_shift_cuda_shared_mem/meanshift_kernels.h

@ -1,5 +1,5 @@
#ifndef SERIAL_KERNELS_H /* Include guard */ #ifndef MEANSHIFT_KERNELS_H /* Include guard */
#define SERIAL_KERNELS_H #define MEANSHIFT_KERNELS_H
/* Structures */ /* Structures */
@ -27,4 +27,4 @@ __global__ void shift_points_kernel(Matrix original_points, Matrix kernel_matrix
__device__ Matrix get_sub_matrix(Matrix A, int row, int col, int ROW_BLOCK_SIZE, __device__ Matrix get_sub_matrix(Matrix A, int row, int col, int ROW_BLOCK_SIZE,
int COLUMN_BLOCK_SIZE); int COLUMN_BLOCK_SIZE);
#endif //SERIAL_KERNELS_H #endif //MEANSHIFT_KERNELS_H

6
mean_shift_cuda_shared_mem/meanshift_utils.h

@ -1,5 +1,5 @@
#ifndef SERIAL_UTILS_H /* Include guard */ #ifndef MEANSHIFT_UTILS_H /* Include guard */
#define SERIAL_UTILS_H #define MEANSHIFT_UTILS_H
#include <stdbool.h> #include <stdbool.h>
@ -32,4 +32,4 @@ void print_matrix(double **array, int rows, int cols);
//If a file already exists new lines are concatenated //If a file already exists new lines are concatenated
void save_matrix(double **matrix, int iteration); void save_matrix(double **matrix, int iteration);
#endif //SERIAL_UTILS_H #endif //MEANSHIFT_UTILS_H
Loading…
Cancel
Save