From c4eb6d0c1789e42726b750ef2536d6172295a2b5 Mon Sep 17 00:00:00 2001 From: Apostolof Date: Tue, 18 Sep 2018 01:36:47 +0300 Subject: [PATCH] Init sparse matrices --- serial/Makefile | 4 +- serial/serial_gs_pagerank.c | 9 +- serial/serial_gs_pagerank_functions.c | 194 +++++++++++--------------- serial/serial_gs_pagerank_functions.h | 28 ++-- serial/sparse_matrix.c | 126 +++++++++++++++++ serial/sparse_matrix.h | 29 ++++ 6 files changed, 256 insertions(+), 134 deletions(-) create mode 100644 serial/sparse_matrix.c create mode 100644 serial/sparse_matrix.h diff --git a/serial/Makefile b/serial/Makefile index 78d2e2e..a6ed4bc 100644 --- a/serial/Makefile +++ b/serial/Makefile @@ -7,8 +7,8 @@ CC = gcc RM = rm -f CFLAGS_DEBUG=-O0 -g -I. CFLAGS=-O3 -I. -OBJ=serial_gs_pagerank.o serial_gs_pagerank_functions.o -DEPS=serial_gs_pagerank_functions.h +OBJ=serial_gs_pagerank.o serial_gs_pagerank_functions.o sparse_matrix.o +DEPS=serial_gs_pagerank_functions.h sparse_matrix.h # ========================================== # TARGETS diff --git a/serial/serial_gs_pagerank.c b/serial/serial_gs_pagerank.c index 5b056b9..355915f 100644 --- a/serial/serial_gs_pagerank.c +++ b/serial/serial_gs_pagerank.c @@ -1,18 +1,21 @@ #include #include "serial_gs_pagerank_functions.h" +#include "sparse_matrix.h" struct timeval startwtime, endwtime; double seq_time; int main(int argc, char **argv) { - int **directedWebGraph; - double **transitionMatrix, *pagerankVector; + SparseMatrix transitionMatrix; + double *pagerankVector; Parameters parameters; + transitionMatrix = createSparseMatrix(); + parseArguments(argc, argv, ¶meters); - initialize(&directedWebGraph, &transitionMatrix, &pagerankVector, ¶meters); + initialize(&transitionMatrix, &pagerankVector, ¶meters); // Starts wall-clock timer gettimeofday (&startwtime, NULL); diff --git a/serial/serial_gs_pagerank_functions.c b/serial/serial_gs_pagerank_functions.c index 88d6d4d..99ef587 100644 --- a/serial/serial_gs_pagerank_functions.c +++ b/serial/serial_gs_pagerank_functions.c @@ -9,10 +9,11 @@ const char *ARGUMENT_OUTPUT_FILENAME = "-o"; const int NUMERICAL_BASE = 10; char *DEFAULT_OUTPUT_FILENAME = "pagerank_output"; +const int MAX_PAGE_LINKS_TEXT_SIZE = 4096; // ==================== PAGERANK ==================== -int pagerank(double ***transitionMatrix, double **pagerankVector, Parameters parameters) { +int pagerank(SparseMatrix *transitionMatrix, double **pagerankVector, Parameters parameters) { int iterations = 0; double delta, *vectorDifference = (double *) malloc(parameters.numberOfPages * sizeof(double)), @@ -40,10 +41,13 @@ int pagerank(double ***transitionMatrix, double **pagerankVector, Parameters par do { memcpy(previousPagerankVector, *pagerankVector, parameters.numberOfPages * sizeof(double)); - matrixVectorMultiplication(*transitionMatrix, previousPagerankVector, - linksFromConvergedPagesPagerankVector, convergedPagerankVector, + matrixVectorMultiplication(transitionMatrix, previousPagerankVector, pagerankVector, parameters.numberOfPages, parameters.dampingFactor); + for (int i=0; ivalue : 0; } - // Zeros out CN and CC sub-matrices - (*transitionMatrix)[i][j] = 0; - // Zeros out NC sub-matrix - (*transitionMatrix)[j][i] = 0; + deleteElement(transitionMatrix, i, j); + deleteElement(transitionMatrix, j, i); } double sum = 0; @@ -104,14 +107,14 @@ int pagerank(double ***transitionMatrix, double **pagerankVector, Parameters par * from the file and creates the initial transition probability distribution * matrix. */ -void initialize(int ***directedWebGraph, double ***transitionMatrix, +void initialize(SparseMatrix *transitionMatrix, double **pagerankVector, Parameters *parameters) { // Reads web graph from file if ((*parameters).verbose) { printf("----- Reading graph from file -----\n"); } - readGraphFromFile(directedWebGraph, parameters); + generateNormalizedTransitionMatrixFromFile(transitionMatrix, parameters); // Outputs the algorithm parameters to the console if ((*parameters).verbose) { @@ -135,49 +138,8 @@ void initialize(int ***directedWebGraph, double ***transitionMatrix, (*pagerankVector)[i] = webUniformProbability; } - // Generates the initial transition matrix (matrix P). - generateNormalizedTransitionMatrix(transitionMatrix, *directedWebGraph, *parameters); // Transposes the transition matrix (P^T). - transposeMatrix(transitionMatrix, (*parameters).numberOfPages, (*parameters).numberOfPages); -} - -/* - * generateNormalizedTransitionMatrix generates the normalized transition matrix - * from the graph data (matrix P'). -*/ -void generateNormalizedTransitionMatrix(double ***transitionMatrix, - int **directedWebGraph, Parameters parameters) { - // Allocates memory for the transitionMatrix rows - (*transitionMatrix) = (double **) malloc(parameters.numberOfPages * sizeof(double *)); - - for (int i=0; ielements); + + (*parameters).numberOfPages = pageIndex + 1; + + int currentRow = transitionMatrix->firstElement->rowIndex; + SparseMatrixElement *startElement = transitionMatrix->firstElement; + while(true) { + int pageOutdegree = 1; + SparseMatrixElement *currentElement = startElement->nextElement; + + // Calculates current page's outdegree + while (currentElement != NULL) { + if (currentElement->rowIndex == currentRow) { + ++pageOutdegree; + currentElement = currentElement->nextElement; + } else { break; } } + + // Assigns the value 1/outdegree to current page's columns + currentElement = startElement; + for (int i=0; irowIndex == currentRow) { + currentElement->value = 1. / pageOutdegree; + currentElement = currentElement->nextElement; + } else { + break; + } + } + + // Reached the last element; + if (currentElement == NULL) { + break; + } + + startElement = currentElement; + currentRow = startElement->rowIndex; } fclose(graphFile); diff --git a/serial/serial_gs_pagerank_functions.h b/serial/serial_gs_pagerank_functions.h index 9581c7b..853f8a0 100644 --- a/serial/serial_gs_pagerank_functions.h +++ b/serial/serial_gs_pagerank_functions.h @@ -7,6 +7,8 @@ #include #include +#include "sparse_matrix.h" + /* * Constant strings that store the command line options available. */ @@ -23,6 +25,8 @@ extern const int NUMERICAL_BASE; // Default filename used for the output. extern char *DEFAULT_OUTPUT_FILENAME; +extern const int MAX_PAGE_LINKS_TEXT_SIZE; + // Declares a data structure to conveniently hold the algorithm's parameters. typedef struct parameters { int numberOfPages, maxIterations; @@ -31,6 +35,9 @@ typedef struct parameters { char *outputFilename, *graphFilename; } Parameters; +//extern typedef SparseMatrixElement; +//extern typedef SparseMatrix; + // Function validUsage outputs the correct way to use the program with command // line arguments. void validUsage(char *programName); @@ -45,26 +52,17 @@ void parseArguments(int argumentCount, char **argumentVector, Parameters *parame // Function readGraphFromFile loads adjacency matrix, that represents the web // graph, stored in the file provided in the command line arguments to the array // directedWebGraph. -void readGraphFromFile(int ***directedWebGraph, Parameters *parameters); +void generateNormalizedTransitionMatrixFromFile(SparseMatrix *transitionMatrix, Parameters *parameters); // Function savePagerankToFile appends or overwrites the pagerank vector // "pagerankVector" to the file with the filename supplied in the arguments void savePagerankToFile(char *filename, bool append, double *pagerankVector, int vectorSize); -// Function generateNormalizedTransitionMatrix generates the normalized -// transition matrix from the web graph data. -void generateNormalizedTransitionMatrix(double ***transitionMatrix, - int **directedWebGraph, Parameters parameters); - -// Function transposeMatrix transposes a matrix. -void transposeMatrix(double ***matrix, int rows, int columns); - // Function initialize allocates required memory for arrays, reads the dataset // from the file and creates the transition probability distribution matrix. void initialize( - int ***directedWebGraph, /*This is matrix G (web graph)*/ - double ***transitionMatrix, /*This is matrix A (transition probability distribution matrix)*/ + SparseMatrix *transitionMatrix, /*This is matrix A (transition probability distribution matrix)*/ double **pagerankVector, /*This is the resulting pagerank vector*/ Parameters *parameters ); @@ -74,13 +72,13 @@ double vectorNorm(double *vector, int vectorSize); // Function matrixVectorMultiplication calculates the product of the // multiplication between a matrix and the a vector. -void matrixVectorMultiplication(double **transitionMatrix, double *previousPagerankVector, - double *linksFromConvergedPagesPagerankVector, double *convergedPagerankVector, - double **pagerankVector, int vectorSize, double dampingFactor); +void matrixVectorMultiplication(SparseMatrix *transitionMatrix, + double *previousPagerankVector, double **pagerankVector, int vectorSize, + double dampingFactor); // Function pagerank iteratively calculates the pagerank of each page until // either the convergence criterion is met or the maximum number of iterations // is reached. -int pagerank(double ***transitionMatrix, double **pagerankVector, Parameters parameters); +int pagerank(SparseMatrix *transitionMatrix, double **pagerankVector, Parameters parameters); #endif // SERIAL_GS_PAGERANK_FUNCTIONS_H \ No newline at end of file diff --git a/serial/sparse_matrix.c b/serial/sparse_matrix.c new file mode 100644 index 0000000..ba77587 --- /dev/null +++ b/serial/sparse_matrix.c @@ -0,0 +1,126 @@ +#include "sparse_matrix.h" + +SparseMatrix createSparseMatrix() { + SparseMatrix sparseMatrix; + sparseMatrix.elements = 0; + sparseMatrix.firstElement = NULL; + return sparseMatrix; +} + +void apendElement(SparseMatrix *sparseMatrix, double value, int row, int column) { + // Creates the new element + SparseMatrixElement *newElement = (SparseMatrixElement *) malloc(sizeof(SparseMatrixElement)); + newElement->value = value; + newElement->rowIndex = row; + newElement->columnIndex = column; + newElement->nextElement = NULL; + + if (sparseMatrix->firstElement == NULL) { + // Sparse matrix is empty, this is the first element + sparseMatrix->firstElement = newElement; + } else { + //Gets last element of the matrix + SparseMatrixElement *lastElement = sparseMatrix->firstElement; + while (lastElement->nextElement != NULL) { + lastElement = lastElement->nextElement; + } + + lastElement->nextElement = newElement; + } + + sparseMatrix->elements = sparseMatrix->elements + 1; +} + +bool deleteElement(SparseMatrix *sparseMatrix, int row, int column) { + if (sparseMatrix->elements == 0) { + // Matrix is empty, nothing can be deleted + return false; + } else if (sparseMatrix->elements == 1) { + // Matrix has one element. Deletes it. + free(sparseMatrix->firstElement); + sparseMatrix->firstElement = NULL; + sparseMatrix->elements = sparseMatrix->elements - 1; + return true; + } + + SparseMatrixElement *currentElement = sparseMatrix->firstElement; + + if (currentElement->rowIndex == row && currentElement->columnIndex == column) { + sparseMatrix->firstElement = currentElement->nextElement; + free(currentElement); + sparseMatrix->elements = sparseMatrix->elements - 1; + return true; + } + + // Matrix has multiple elements. Finds the first element that has the coordinates + // (row,column) and deletes it. + for (int i=0; ielements - 1; ++i) { + SparseMatrixElement *nextElement = currentElement->nextElement; + if (nextElement->rowIndex == row && nextElement->columnIndex == column) { + currentElement->nextElement = nextElement->nextElement; + free(nextElement); + sparseMatrix->elements = sparseMatrix->elements - 1; + return true; + } else { + currentElement = currentElement->nextElement; + } + } +} + +SparseMatrixElement *getElement(SparseMatrix sparseMatrix, int row, int column) { + SparseMatrixElement *currentElement = sparseMatrix.firstElement; + do { + if (currentElement->rowIndex == row && currentElement->columnIndex == column) { + return currentElement; + } + currentElement = currentElement->nextElement; + } while (currentElement != NULL); + + return NULL; +} + +void transposeSparseMatrix(SparseMatrix *sparseMatrix) { + SparseMatrixElement *currentElement = sparseMatrix->firstElement; + for (int i=0; ielements; ++i) { + int temp = currentElement->rowIndex; + currentElement->rowIndex = currentElement->columnIndex; + currentElement->columnIndex = temp; + + currentElement = currentElement->nextElement; + } +} + +void sparseMatrixVectorMultiplication(SparseMatrix sparseMatrix, + double *vector, double **product, int vectorSize) { + // Initializes the elements of the product vector to zero + for (int i=0; irowIndex, column = element->columnIndex; + + if (row >= vectorSize) { + printf("Error at sparseMatrixVectorMultiplication. Matrix has more rows than vector!\n"); + printf("row = %d\n", row); + exit(EXIT_FAILURE); + } + + (*product)[row] = (*product)[row] + element->value * vector[column]; + element = element->nextElement; + } +} + +void printSparseMatrix(SparseMatrix sparseMatrix) { + if (sparseMatrix.elements == 0) { + return; + } + + SparseMatrixElement *currentElement = sparseMatrix.firstElement; + for (int i=0; irowIndex, + currentElement->columnIndex, currentElement->value); + currentElement = currentElement->nextElement; + } +} \ No newline at end of file diff --git a/serial/sparse_matrix.h b/serial/sparse_matrix.h new file mode 100644 index 0000000..cfa7f8e --- /dev/null +++ b/serial/sparse_matrix.h @@ -0,0 +1,29 @@ +#ifndef SPARSE_MATRIX_H /* Include guard */ +#define SPARSE_MATRIX_H + +#include +#include +#include +#include + +typedef struct sparseMatrixElement { + double value; + int rowIndex, columnIndex; + struct sparseMatrixElement *nextElement; +} SparseMatrixElement; + +typedef struct sparseMatrix { + int elements; + SparseMatrixElement *firstElement; +} SparseMatrix; + +SparseMatrix createSparseMatrix(); +void apendElement(SparseMatrix *sparseMatrix, double value, int row, int column); +bool deleteElement(SparseMatrix *sparseMatrix, int row, int column); +SparseMatrixElement *getElement(SparseMatrix sparseMatrix, int row, int column); +void transposeSparseMatrix(SparseMatrix *sparseMatrix); +void sparseMatrixVectorMultiplication(SparseMatrix sparseMatrix, double *vector, + double **product, int vectorSize); +void printSparseMatrix(SparseMatrix sparseMatrix); + +#endif // SPARSE_MATRIX_H \ No newline at end of file