Exercise 3 for the course "Parallel and distributed systems" of THMMY in AUTH university.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

18 lines
732 B

#include "meanshift_kernels.h"
#include <stdio.h>
__global__ void multiply_kernel(Matrix matrix1, Matrix matrix2, Matrix output){
// Each thread computes one element of output
// by accumulating results into cell_value
double cell_value = 0;
int row = blockIdx.x * blockDim.x + threadIdx.x;
7 years ago
int col = blockIdx.y * blockDim.y + threadIdx.y;
if (row + col < output.height * output.width){
for (int element_index = 0; element_index < matrix1.width; ++element_index){
cell_value += matrix1.elements[row * matrix1.width + element_index]
* matrix2.elements[element_index * matrix2.width + col];
}
output.elements[row * output.width + col] = cell_value;
}
}