From 694f53cc0f99c2ec494aa8e13d737a70c46cdea5 Mon Sep 17 00:00:00 2001 From: Apostolof Date: Wed, 3 Oct 2018 22:37:33 +0300 Subject: [PATCH] Implement transition matrix pruning, Fix sparse matrices sizes, Fix output --- serial/coo_sparse_matrix.c | 4 +- serial/csr_sparse_matrix.c | 14 ++-- serial/csr_sparse_matrix.h | 4 +- serial/serial_gs_pagerank.c | 47 +++++++----- serial/serial_gs_pagerank_functions.c | 100 ++++++++++++++------------ serial/serial_gs_pagerank_functions.h | 4 +- 6 files changed, 98 insertions(+), 75 deletions(-) diff --git a/serial/coo_sparse_matrix.c b/serial/coo_sparse_matrix.c index 07d7293..689a0fe 100644 --- a/serial/coo_sparse_matrix.c +++ b/serial/coo_sparse_matrix.c @@ -49,7 +49,7 @@ void transposeSparseMatrix(CooSparseMatrix *sparseMatrix) { void transformToCSR(CooSparseMatrix initialSparseMatrix, CsrSparseMatrix *transformedSparseMatrix) { // Checks if the sizes of the two matrices fit - if (initialSparseMatrix.numberOfNonZeroElements > transformedSparseMatrix->size) { + if (initialSparseMatrix.numberOfNonZeroElements > transformedSparseMatrix->numberOfElements) { printf("Transformed CSR matrix does not have enough space!\n"); exit(EXIT_FAILURE); } @@ -85,8 +85,6 @@ void transformToCSR(CooSparseMatrix initialSparseMatrix, transformedSparseMatrix->rowCumulativeIndexes[i] = last; last = temp; } - - transformedSparseMatrix->numberOfNonZeroElements = initialSparseMatrix.numberOfNonZeroElements; } void cooSparseMatrixVectorMultiplication(CooSparseMatrix sparseMatrix, diff --git a/serial/csr_sparse_matrix.c b/serial/csr_sparse_matrix.c index f1f9005..65e200a 100644 --- a/serial/csr_sparse_matrix.c +++ b/serial/csr_sparse_matrix.c @@ -3,7 +3,7 @@ CsrSparseMatrix initCsrSparseMatrix() { CsrSparseMatrix sparseMatrix; sparseMatrix.size = 0; - sparseMatrix.numberOfNonZeroElements = 0; + sparseMatrix.numberOfElements = 0; sparseMatrix.values = NULL; sparseMatrix.columnIndexes = NULL; @@ -11,17 +11,19 @@ CsrSparseMatrix initCsrSparseMatrix() { return sparseMatrix; } -void allocMemoryForCsr(CsrSparseMatrix *sparseMatrix, int numberOfElements) { +void allocMemoryForCsr(CsrSparseMatrix *sparseMatrix, int size, int numberOfElements) { sparseMatrix->values = (double *) malloc(numberOfElements * sizeof(double)); sparseMatrix->columnIndexes = (int *) malloc( numberOfElements * sizeof(int)); sparseMatrix->rowCumulativeIndexes = (int *) malloc( - (numberOfElements + 1) * sizeof(int)); + (size + 1) * sizeof(int)); - for (int i=0; irowCumulativeIndexes[i] = 0; } - sparseMatrix->size = numberOfElements; + + sparseMatrix->size = size; + sparseMatrix->numberOfElements = numberOfElements; } void zeroOutRow(CsrSparseMatrix *sparseMatrix, int row) { @@ -34,7 +36,7 @@ void zeroOutRow(CsrSparseMatrix *sparseMatrix, int row) { } void zeroOutColumn(CsrSparseMatrix *sparseMatrix, int column) { - for (int i=0; inumberOfNonZeroElements; ++i){ + for (int i=0; inumberOfElements; ++i){ if(sparseMatrix->columnIndexes[i] == column){ sparseMatrix->values[i] = 0; } diff --git a/serial/csr_sparse_matrix.h b/serial/csr_sparse_matrix.h index dddbe49..5588e9a 100644 --- a/serial/csr_sparse_matrix.h +++ b/serial/csr_sparse_matrix.h @@ -12,7 +12,7 @@ // A sparse matrix in compressed SparseRow format. typedef struct csrSparseMatrix { - int size, numberOfNonZeroElements; + int size, numberOfElements; int *rowCumulativeIndexes, *columnIndexes; double *values; } CsrSparseMatrix; @@ -24,7 +24,7 @@ typedef struct csrSparseMatrix { CsrSparseMatrix initCsrSparseMatrix(); // allocMemoryForCsr allocates memory for the elements of the matrix. -void allocMemoryForCsr(CsrSparseMatrix *sparseMatrix, int numberOfElements); +void allocMemoryForCsr(CsrSparseMatrix *sparseMatrix, int size, int numberOfElements); // zeroOutRow assigns a zero value to all the elements of a row in the matrix. void zeroOutRow(CsrSparseMatrix *sparseMatrix, int row); diff --git a/serial/serial_gs_pagerank.c b/serial/serial_gs_pagerank.c index ad96d24..6c62b5a 100644 --- a/serial/serial_gs_pagerank.c +++ b/serial/serial_gs_pagerank.c @@ -15,22 +15,30 @@ int main(int argc, char **argv) { initialize(&transitionMatrix, &pagerankVector, ¶meters); + // Saves information about the dataset to the output file + { + FILE *outputFile; + outputFile = fopen(parameters.outputFilename, "w"); + + if (outputFile == NULL) { + printf("Error while opening the output file.\n"); + exit(EXIT_FAILURE); + } + + fprintf(outputFile, "Pagerank will run for the dataset %s\n"\ + "Dataset contains %d pages with %d outlinks.\n", + parameters.graphFilename, parameters.numberOfPages, transitionMatrix.size); + + fclose(outputFile); + } + // Starts wall-clock timer gettimeofday (&startwtime, NULL); int* iterations = (int *)malloc(parameters.numberOfPages*sizeof(int)); - + + // Calculates pagerank iterations = pagerank(&transitionMatrix, &pagerankVector, &convergenceStatus, parameters, &maxIterationsForConvergence); - if (parameters.verbose) { - printf(ANSI_COLOR_YELLOW "\n----- RESULTS -----\n" ANSI_COLOR_RESET); - if (convergenceStatus) { - printf(ANSI_COLOR_GREEN "Pagerank converged after %d iterations!\n" \ - ANSI_COLOR_RESET, maxIterationsForConvergence); - } else { - printf(ANSI_COLOR_RED "Pagerank did not converge after max number of" \ - " iterations (%d) was reached!\n" ANSI_COLOR_RESET, maxIterationsForConvergence); - } - } // Stops wall-clock timer gettimeofday (&endwtime, NULL); @@ -38,13 +46,20 @@ int main(int argc, char **argv) { endwtime.tv_sec - startwtime.tv_sec); printf("%s wall clock time = %f\n","Pagerank (Gauss-Seidel method), serial implementation", seq_time); - if (!parameters.history) { - // Always outputs numberOfPages, max_iterations, last pagerank and iterations - // for all pages - savePagerankToFile(parameters.outputFilename, false, pagerankVector, - parameters.numberOfPages, iterations, maxIterationsForConvergence); + + printf(ANSI_COLOR_YELLOW "\n----- RESULTS -----\n" ANSI_COLOR_RESET); + if (convergenceStatus) { + printf(ANSI_COLOR_GREEN "Pagerank converged after %d iterations!\n" \ + ANSI_COLOR_RESET, maxIterationsForConvergence); + } else { + printf(ANSI_COLOR_RED "Pagerank did not converge after max number of" \ + " iterations (%d) was reached!\n" ANSI_COLOR_RESET, maxIterationsForConvergence); } + // Saves results to the output file + savePagerankToFile(parameters.outputFilename, iterations, pagerankVector, + parameters.numberOfPages, maxIterationsForConvergence); + free(pagerankVector); destroyCsrSparseMatrix(&transitionMatrix); } diff --git a/serial/serial_gs_pagerank_functions.c b/serial/serial_gs_pagerank_functions.c index 4ead5cf..1bcc2e6 100644 --- a/serial/serial_gs_pagerank_functions.c +++ b/serial/serial_gs_pagerank_functions.c @@ -24,15 +24,18 @@ int* pagerank(CsrSparseMatrix *transitionMatrix, double **pagerankVector, bool *convergenceStatus, Parameters parameters, int* maxIterationsForConvergence) { // Variables declaration int numberOfPages = parameters.numberOfPages; + int *iterations; double delta, *pagerankDifference, *previousPagerankVector, *convergedPagerankVector, *linksFromConvergedPagesPagerankVector; + CsrSparseMatrix originalTransitionMatrix = initCsrSparseMatrix(); CooSparseMatrix linksFromConvergedPages = initCooSparseMatrix(); bool *convergenceMatrix; - int* iterations = (int *)malloc(numberOfPages*sizeof(int)); // Space allocation { size_t sizeofDouble = sizeof(double); + // iterations until each page converged + iterations = (int *) malloc(numberOfPages * sizeof(int)); // pagerankDifference used to calculate delta pagerankDifference = (double *) malloc(numberOfPages * sizeofDouble); // previousPagerankVector holds last iteration's pagerank vector @@ -48,7 +51,16 @@ int* pagerank(CsrSparseMatrix *transitionMatrix, double **pagerankVector, *convergenceStatus = false; // Initialization - allocMemoryForCoo(&linksFromConvergedPages, transitionMatrix->numberOfNonZeroElements); + // originalTransitionMatrix used to run pagerank in phases + allocMemoryForCsr(&originalTransitionMatrix, transitionMatrix->size, transitionMatrix->numberOfElements); + memcpy(originalTransitionMatrix.rowCumulativeIndexes, transitionMatrix->rowCumulativeIndexes, + (transitionMatrix->size+1) * sizeof(int)); + memcpy(originalTransitionMatrix.columnIndexes, transitionMatrix->columnIndexes, + transitionMatrix->numberOfElements * sizeof(int)); + memcpy(originalTransitionMatrix.values, transitionMatrix->values, + transitionMatrix->numberOfElements * sizeof(double)); + + allocMemoryForCoo(&linksFromConvergedPages, transitionMatrix->numberOfElements); for (int i=0; ivalues, originalTransitionMatrix.values, + transitionMatrix->numberOfElements * sizeof(double)); + for (int i=0; i 10) { + if (argumentCount < 2 || argumentCount > 12) { validUsage(argumentVector[0]); } (*parameters).numberOfPages = 0; (*parameters).maxIterations = 0; - (*parameters).convergenceCriterion = 1; + (*parameters).convergenceCriterion = 0.001; (*parameters).dampingFactor = 0.85; (*parameters).verbose = false; (*parameters).history = false; @@ -435,7 +456,7 @@ void generateNormalizedTransitionMatrixFromFile(CsrSparseMatrix *transitionMatri // Transposes the temporary transition matrix (P^T). transposeSparseMatrix(&tempMatrix); - allocMemoryForCsr(transitionMatrix, numberOfEdges); + allocMemoryForCsr(transitionMatrix, (*parameters).numberOfPages, numberOfEdges); // Transforms the temporary COO matrix to the desired CSR format transformToCSR(tempMatrix, transitionMatrix); destroyCooSparseMatrix(&tempMatrix); @@ -474,47 +495,34 @@ int checkIncrement(int previousIndex, int maxIndex, char *programName) { return ++previousIndex; } -void savePagerankToFile(char *filename, bool append, double *pagerankVector, - int vectorSize, int* iterations, int maxIterationsForConvergence) { +void savePagerankToFile(char *filename, int *iterationsUntilConvergence, + double *pagerankVector, int vectorSize, int iteration) { FILE *outputFile; - if (append) { - outputFile = fopen(filename, "a"); - } else { - outputFile = fopen(filename, "w"); - } + outputFile = fopen(filename, "a"); if (outputFile == NULL) { printf("Error while opening the output file.\n"); return; } - - if(append){ - double sum = 0; - for (int i=0; i