diff --git a/pthread/Makefile b/pthread/Makefile new file mode 100644 index 0000000..3daf8c8 --- /dev/null +++ b/pthread/Makefile @@ -0,0 +1,37 @@ +SHELL := /bin/bash + +# ============================================ +# COMMANDS + +CC = gcc -std=gnu99 -pthread +RM = rm -f +CFLAGS_DEBUG=-O0 -ggdb3 -Wall -I. +CFLAGS=-O3 -Wall -I. +OBJ=serial_gs_pagerank.o serial_gs_pagerank_functions.o coo_sparse_matrix.o csr_sparse_matrix.o +DEPS=serial_gs_pagerank_functions.h coo_sparse_matrix.h csr_sparse_matrix.h + +# ========================================== +# TARGETS + +EXECUTABLES = pagerank.out + +.PHONY: all clean + +all: $(EXECUTABLES) + +# ========================================== +# DEPENDENCIES (HEADERS) + +%.o: %.c $(DEPS) + $(CC) -c -o $@ $< $(CFLAGS) + +.PRECIOUS: $(EXECUTABLES) $(OBJ) + +# ========================================== +# EXECUTABLE (MAIN) + +$(EXECUTABLES): $(OBJ) + $(CC) -o $@ $^ $(CFLAGS) + +clean: + $(RM) *.o *~ $(EXECUTABLES) diff --git a/pthread/coo_sparse_matrix.c b/pthread/coo_sparse_matrix.c new file mode 100644 index 0000000..07d7293 --- /dev/null +++ b/pthread/coo_sparse_matrix.c @@ -0,0 +1,132 @@ +#include "coo_sparse_matrix.h" + +CooSparseMatrix initCooSparseMatrix() { + CooSparseMatrix sparseMatrix; + sparseMatrix.size = 0; + sparseMatrix.numberOfNonZeroElements = 0; + sparseMatrix.elements = NULL; + return sparseMatrix; +} + +void allocMemoryForCoo(CooSparseMatrix *sparseMatrix, int numberOfElements) { + sparseMatrix->elements = (CooSparseMatrixElement **) malloc( + numberOfElements * sizeof(CooSparseMatrixElement *)); + sparseMatrix->size = numberOfElements; +} + +void addElement(CooSparseMatrix *sparseMatrix, double value, int row, int column) { + // Checks if there is enough space allocated + if (sparseMatrix->numberOfNonZeroElements == sparseMatrix->size) { + printf("Number of non zero elements exceeded size of matrix!\n"); + exit(EXIT_FAILURE); + } + + // Creates the new element + CooSparseMatrixElement *newElement = (CooSparseMatrixElement *) malloc( + sizeof(CooSparseMatrixElement)); + newElement->value = value; + newElement->rowIndex = row; + newElement->columnIndex = column; + + // Adds the new element to the first empty (NULL) address of the matrix + sparseMatrix->elements[sparseMatrix->numberOfNonZeroElements] = newElement; + sparseMatrix->numberOfNonZeroElements = sparseMatrix->numberOfNonZeroElements + 1; +} + +void transposeSparseMatrix(CooSparseMatrix *sparseMatrix) { + for (int i=0; inumberOfNonZeroElements; ++i) { + CooSparseMatrixElement *element = sparseMatrix->elements[i]; + int tempRow = element->rowIndex; + element->rowIndex = element->columnIndex; + element->columnIndex = tempRow; + } +} + +/* + * This function is a port of the one found here: + * https://github.com/scipy/scipy/blob/3b36a57/scipy/sparse/sparsetools/coo.h#L34 +*/ +void transformToCSR(CooSparseMatrix initialSparseMatrix, + CsrSparseMatrix *transformedSparseMatrix) { + // Checks if the sizes of the two matrices fit + if (initialSparseMatrix.numberOfNonZeroElements > transformedSparseMatrix->size) { + printf("Transformed CSR matrix does not have enough space!\n"); + exit(EXIT_FAILURE); + } + + // Calculates the elements per row + for (int i=0; irowIndex; + transformedSparseMatrix->rowCumulativeIndexes[rowIndex] = + transformedSparseMatrix->rowCumulativeIndexes[rowIndex] + 1; + } + + // Cumulative sums the non zero elements per row + for (int i=0, sum=0; isize+1; ++i){ + int temp = transformedSparseMatrix->rowCumulativeIndexes[i]; + transformedSparseMatrix->rowCumulativeIndexes[i] = sum; + sum += temp; + } + + // Copies the values and columns of the elements + for (int i=0; irowIndex; + int destinationIndex = transformedSparseMatrix->rowCumulativeIndexes[row]; + + transformedSparseMatrix->columnIndexes[destinationIndex] = initialSparseMatrix.elements[i]->columnIndex; + transformedSparseMatrix->values[destinationIndex] = initialSparseMatrix.elements[i]->value; + + transformedSparseMatrix->rowCumulativeIndexes[row]++; + } + + // Fixes the cumulative sum + for (int i=0, last=0; i<=transformedSparseMatrix->size; i++){ + int temp = transformedSparseMatrix->rowCumulativeIndexes[i]; + transformedSparseMatrix->rowCumulativeIndexes[i] = last; + last = temp; + } + + transformedSparseMatrix->numberOfNonZeroElements = initialSparseMatrix.numberOfNonZeroElements; +} + +void cooSparseMatrixVectorMultiplication(CooSparseMatrix sparseMatrix, + double *vector, double **product, int vectorSize) { + // Initializes the elements of the product vector to zero + for (int i=0; irowIndex, column = element->columnIndex; + + if (row >= vectorSize) { + printf("Error at sparseMatrixVectorMultiplication. Matrix has more rows than vector!\n"); + printf("row = %d\n", row); + exit(EXIT_FAILURE); + } + + (*product)[row] = (*product)[row] + element->value * vector[column]; + } +} + +void destroyCooSparseMatrix(CooSparseMatrix *sparseMatrix) { + for (int i=0; inumberOfNonZeroElements; ++i) { + free(sparseMatrix->elements[i]); + } + free(sparseMatrix->elements); +} + +void printCooSparseMatrix(CooSparseMatrix sparseMatrix) { + if (sparseMatrix.numberOfNonZeroElements == 0) { + return; + } + + CooSparseMatrixElement *element; + for (int i=0; irowIndex, element->columnIndex, + element->value); + } +} \ No newline at end of file diff --git a/pthread/coo_sparse_matrix.h b/pthread/coo_sparse_matrix.h new file mode 100644 index 0000000..0a34b7e --- /dev/null +++ b/pthread/coo_sparse_matrix.h @@ -0,0 +1,60 @@ +#ifndef COO_SPARSE_MATRIX_H /* Include guard */ +#define COO_SPARSE_MATRIX_H + +/* ===== INCLUDES ===== */ + +#include +#include +#include +#include + +#include "csr_sparse_matrix.h" + +/* ===== STRUCTURES ===== */ + +// One element of the coordinate formated sparse matrix. +typedef struct cooSparseMatrixElement { + double value; + int rowIndex, columnIndex; +} CooSparseMatrixElement; + +// A sparse matrix in COOrdinate format (aka triplet format). +typedef struct cooSparseMatrix { + int size, numberOfNonZeroElements; + CooSparseMatrixElement **elements; +} CooSparseMatrix; + +/* ===== FUNCTION DEFINITIONS ===== */ + +// initCooSparseMatrix creates and initializes the members of a CooSparseMatrix +// structure instance. +CooSparseMatrix initCooSparseMatrix(); + +//allocMemoryForCoo allocates memory for the elements of the matrix. +void allocMemoryForCoo(CooSparseMatrix *sparseMatrix, int numberOfElements); + +// addElement adds an element representing the triplet passed in the arguments +// to the first empty address of the space allocated for the elements. +void addElement(CooSparseMatrix *sparseMatrix, double value, int row, + int column); + +// transposeSparseMatrix transposes the matrix. +void transposeSparseMatrix(CooSparseMatrix *sparseMatrix); + +// transformToCSR transforms the sparse matrix representation format from COO +// to CSR. +void transformToCSR(CooSparseMatrix initialSparseMatrix, + CsrSparseMatrix *transformedSparseMatrix); + +// cooSparseMatrixVectorMultiplication calculates the product of a +// CooSparseMatrix and a vector. +void cooSparseMatrixVectorMultiplication(CooSparseMatrix sparseMatrix, + double *vector, double **product, int vectorSize); + +// destroyCooSparseMatrix frees all space used by the CooSparseMatrix. +void destroyCooSparseMatrix(CooSparseMatrix *sparseMatrix); + +// printCooSparseMatrix prints the values of a CooSparseMatrix. +void printCooSparseMatrix(CooSparseMatrix sparseMatrix); + +#endif // COO_SPARSE_MATRIX_H \ No newline at end of file diff --git a/pthread/csr_sparse_matrix.c b/pthread/csr_sparse_matrix.c new file mode 100644 index 0000000..f1f9005 --- /dev/null +++ b/pthread/csr_sparse_matrix.c @@ -0,0 +1,92 @@ +#include "csr_sparse_matrix.h" + +CsrSparseMatrix initCsrSparseMatrix() { + CsrSparseMatrix sparseMatrix; + sparseMatrix.size = 0; + sparseMatrix.numberOfNonZeroElements = 0; + + sparseMatrix.values = NULL; + sparseMatrix.columnIndexes = NULL; + sparseMatrix.rowCumulativeIndexes = NULL; + return sparseMatrix; +} + +void allocMemoryForCsr(CsrSparseMatrix *sparseMatrix, int numberOfElements) { + sparseMatrix->values = (double *) malloc(numberOfElements * sizeof(double)); + sparseMatrix->columnIndexes = (int *) malloc( + numberOfElements * sizeof(int)); + sparseMatrix->rowCumulativeIndexes = (int *) malloc( + (numberOfElements + 1) * sizeof(int)); + + for (int i=0; irowCumulativeIndexes[i] = 0; + } + sparseMatrix->size = numberOfElements; +} + +void zeroOutRow(CsrSparseMatrix *sparseMatrix, int row) { + // Gets start and end indexes of the row's elements + int startIndex = sparseMatrix->rowCumulativeIndexes[row], + endIndex = sparseMatrix->rowCumulativeIndexes[row+1]; + for (int i=startIndex; ivalues[i] = 0; + } +} + +void zeroOutColumn(CsrSparseMatrix *sparseMatrix, int column) { + for (int i=0; inumberOfNonZeroElements; ++i){ + if(sparseMatrix->columnIndexes[i] == column){ + sparseMatrix->values[i] = 0; + } + } +} + +void csrSparseMatrixVectorMultiplication(CsrSparseMatrix sparseMatrix, + double *vector, double **product, int vectorSize) { + // Initializes the elements of the product vector to zero + for (int i=0; ivalues); + free(sparseMatrix->rowCumulativeIndexes); + free(sparseMatrix->columnIndexes); +} + +void printCsrSparseMatrix(CsrSparseMatrix sparseMatrix) { + if (sparseMatrix.size == 0) { + return; + } + + for (int i=0; i +#include +#include +#include + +/* ===== STRUCTURES ===== */ + +// A sparse matrix in compressed SparseRow format. +typedef struct csrSparseMatrix { + int size, numberOfNonZeroElements; + int *rowCumulativeIndexes, *columnIndexes; + double *values; +} CsrSparseMatrix; + +/* ===== FUNCTION DEFINITIONS ===== */ + +// initCsrSparseMatrix creates and initializes the members of a CsrSparseMatrix +// structure instance. +CsrSparseMatrix initCsrSparseMatrix(); + +// allocMemoryForCsr allocates memory for the elements of the matrix. +void allocMemoryForCsr(CsrSparseMatrix *sparseMatrix, int numberOfElements); + +// zeroOutRow assigns a zero value to all the elements of a row in the matrix. +void zeroOutRow(CsrSparseMatrix *sparseMatrix, int row); + +// zeroOutColumn assigns a zero value to all the elements of a column in the +// matrix. +void zeroOutColumn(CsrSparseMatrix *sparseMatrix, int column); + +// csrSparseMatrixVectorMultiplication calculates the product of a +// CsrSparseMatrix and a vector. +void csrSparseMatrixVectorMultiplication(CsrSparseMatrix sparseMatrix, + double *vector, double **product, int vectorSize); + +// destroyCsrSparseMatrix frees all space used by the CsrSparseMatrix. +void destroyCsrSparseMatrix(CsrSparseMatrix *sparseMatrix); + +// printCsrSparseMatrix prints the values of a CsrSparseMatrix. +void printCsrSparseMatrix(CsrSparseMatrix sparseMatrix); + +#endif // CSR_SPARSE_MATRIX_H \ No newline at end of file diff --git a/pthread/pagerank.out b/pthread/pagerank.out new file mode 100644 index 0000000..5e0da74 Binary files /dev/null and b/pthread/pagerank.out differ diff --git a/pthread/serial_gs_pagerank.c b/pthread/serial_gs_pagerank.c new file mode 100644 index 0000000..ccf25d1 --- /dev/null +++ b/pthread/serial_gs_pagerank.c @@ -0,0 +1,42 @@ +#include + +#include "serial_gs_pagerank_functions.h" + +struct timeval startwtime, endwtime; + +int main(int argc, char **argv) { + CsrSparseMatrix transitionMatrix = initCsrSparseMatrix(); + double *pagerankVector; + bool convergenceStatus; + Parameters parameters; + + parseArguments(argc, argv, ¶meters); + + initialize(&transitionMatrix, &pagerankVector, ¶meters); + + // Starts wall-clock timer + gettimeofday (&startwtime, NULL); + + int iterations = pagerank(&transitionMatrix, &pagerankVector, + &convergenceStatus, parameters); + if (parameters.verbose) { + printf(ANSI_COLOR_YELLOW "\n----- RESULTS -----\n" ANSI_COLOR_RESET); + if (convergenceStatus) { + printf(ANSI_COLOR_GREEN "Pagerank converged after %d iterations!\n" \ + ANSI_COLOR_RESET, iterations); + } else { + printf(ANSI_COLOR_RED "Pagerank did not converge after max number of" \ + " iterations (%d) was reached!\n" ANSI_COLOR_RESET, iterations); + } + } + + // Stops wall-clock timer + gettimeofday (&endwtime, NULL); + double seq_time = (double)((endwtime.tv_usec - startwtime.tv_usec)/1.0e6 + + endwtime.tv_sec - startwtime.tv_sec); + printf("%s wall clock time = %f\n","Pagerank (Gauss-Seidel method), serial implementation", + seq_time); + + free(pagerankVector); + destroyCsrSparseMatrix(&transitionMatrix); +} diff --git a/pthread/serial_gs_pagerank_functions.c b/pthread/serial_gs_pagerank_functions.c new file mode 100644 index 0000000..3327756 --- /dev/null +++ b/pthread/serial_gs_pagerank_functions.c @@ -0,0 +1,610 @@ +/* ===== INCLUDES ===== */ + +#include "serial_gs_pagerank_functions.h" +#include + +/* ===== CONSTANTS ===== */ + +const char *ARGUMENT_CONVERGENCE_TOLERANCE = "-c"; +const char *ARGUMENT_MAX_ITERATIONS = "-m"; +const char *ARGUMENT_DAMPING_FACTOR = "-a"; +const char *ARGUMENT_VERBAL_OUTPUT = "-v"; +const char *ARGUMENT_OUTPUT_HISTORY = "-h"; +const char *ARGUMENT_OUTPUT_FILENAME = "-o"; + +const int NUMERICAL_BASE = 10; +char *DEFAULT_OUTPUT_FILENAME = "pagerank_output"; +const int FILE_READ_BUFFER_SIZE = 4096; + +const int CONVERGENCE_CHECK_ITERATION_PERIOD = 2; +const int SPARSITY_INCREASE_ITERATION_PERIOD = 10; + +/* ===== THREAD STUFF ====== */ + +pthread_mutex_t Q = PTHREAD_MUTEX_INITIALIZER;; + +typedef struct threadArgs{ + CsrSparseMatrix* transitionMatrix; + double* pagerankVector; + double* previousPagerankVector; + int numberOfPages; + double webUniformProbability; + double *linksFromConvergedPagesPagerankVector; + double *convergedPagerankVector; + int position; + double dF; +}threadArgs; + +/* ===== FUNCTIONS ===== */ + +int pagerank(CsrSparseMatrix *transitionMatrix, double **pagerankVector, + bool *convergenceStatus, Parameters parameters) { + // Variables declaration + int iterations = 0, numberOfPages = parameters.numberOfPages; + double delta, *pagerankDifference, *previousPagerankVector, + *convergedPagerankVector, *linksFromConvergedPagesPagerankVector; + CooSparseMatrix linksFromConvergedPages = initCooSparseMatrix(); + bool *convergenceMatrix; + + + int threadNum = parameters.numberOfPages; + pthread_t* threads = (pthread_t *)malloc(threadNum*sizeof(pthread_t)); + + // Space allocation + { + size_t sizeofDouble = sizeof(double); + // pagerankDifference used to calculate delta + pagerankDifference = (double *) malloc(numberOfPages * sizeofDouble); + // previousPagerankVector holds last iteration's pagerank vector + previousPagerankVector = (double *) malloc(numberOfPages * sizeofDouble); + // convergedPagerankVector is the pagerank vector of converged pages only + convergedPagerankVector = (double *) malloc(numberOfPages * sizeofDouble); + // linksFromConvergedPagesPagerankVector holds the partial sum of the + // pagerank vector, that describes effect of the links from converged + // pages to non converged pages + linksFromConvergedPagesPagerankVector = (double *) malloc(numberOfPages * sizeofDouble); + // convergenceMatrix indicates which pages have converged + convergenceMatrix = (bool *) malloc(numberOfPages * sizeof(bool)); + *convergenceStatus = false; + + // Initialization + allocMemoryForCoo(&linksFromConvergedPages, transitionMatrix->numberOfNonZeroElements); + for (int i=0; irowCumulativeIndexes[i], + rowEndIndex = transitionMatrix->rowCumulativeIndexes[i+1]; + if (rowEndIndex > rowStartIndex) { + // This row (page) has non zero elements (out-links) + for (int j=rowStartIndex; jcolumnIndexes[j]; + if (convergenceMatrix[pageLinksTo] == false){ + // Link exists, adds element to the vector + addElement(&linksFromConvergedPages, + transitionMatrix->values[j], i, pageLinksTo); + } + } + } + + // Increases sparsity of the transition matrix by zeroing + // out elements that correspond to converged pages + zeroOutRow(transitionMatrix, i); + zeroOutColumn(transitionMatrix, i); + + // Builds the new linksFromConvergedPagesPagerankVector + cooSparseMatrixVectorMultiplication(linksFromConvergedPages, + *pagerankVector, &linksFromConvergedPagesPagerankVector, + numberOfPages); + } + } + free(newlyConvergedPages); + } + + ++iterations; + // Outputs information about this iteration + if (iterations%2) { + printf(ANSI_COLOR_BLUE "Iteration %d: delta = %f\n" ANSI_COLOR_RESET, iterations, delta); + } else { + printf(ANSI_COLOR_CYAN "Iteration %d: delta = %f\n" ANSI_COLOR_RESET, iterations, delta); + } + } while (!*convergenceStatus && (parameters.maxIterations == 0 || + iterations < parameters.maxIterations)); + + if (!parameters.history) { + // Always outputs last pagerank vector to file + savePagerankToFile(parameters.outputFilename, false, *pagerankVector, + numberOfPages, iterations); + } + + // Frees memory + free(pagerankDifference); + free(previousPagerankVector); + free(convergedPagerankVector); + free(linksFromConvergedPagesPagerankVector); + free(convergenceMatrix); + destroyCooSparseMatrix(&linksFromConvergedPages); + + return iterations; +} + +/* + * initialize allocates required memory for arrays, reads the web graph from the + * from the file and creates the initial transition probability distribution + * matrix. +*/ +void initialize(CsrSparseMatrix *transitionMatrix, + double **pagerankVector, Parameters *parameters) { + + // Reads web graph from file + if ((*parameters).verbose) { + printf(ANSI_COLOR_YELLOW "----- Reading graph from file -----\n" ANSI_COLOR_RESET); + } + generateNormalizedTransitionMatrixFromFile(transitionMatrix, parameters); + + // Outputs the algorithm parameters to the console + if ((*parameters).verbose) { + printf(ANSI_COLOR_YELLOW "\n----- Running with parameters -----\n" ANSI_COLOR_RESET\ + "Number of pages: %d", (*parameters).numberOfPages); + if (!(*parameters).maxIterations) { + printf("\nMaximum number of iterations: inf"); + } else { + printf("\nMaximum number of iterations: %d", (*parameters).maxIterations); + } + printf("\nConvergence criterion: %f" \ + "\nDamping factor: %f" \ + "\nGraph filename: %s\n", (*parameters).convergenceCriterion, + (*parameters).dampingFactor, (*parameters).graphFilename); + } + + // Allocates memory for the pagerank vector + (*pagerankVector) = (double *) malloc((*parameters).numberOfPages * sizeof(double)); + double webUniformProbability = 1. / (*parameters).numberOfPages; + for (int i=0; i<(*parameters).numberOfPages; ++i) { + (*pagerankVector)[i] = webUniformProbability; + } +} + +// ==================== MATH UTILS ==================== + +/* + * calculateNextPagerank calculates the product of the multiplication + * between a matrix and the a vector in a cheap way. +*/ +void calculateNextPagerank(CsrSparseMatrix *transitionMatrix, + double *previousPagerankVector, double **pagerankVector, + double *linksFromConvergedPagesPagerankVector, + double *convergedPagerankVector, int vectorSize, double dampingFactor, pthread_t *threads, int threadNum) { + + pthread_attr_t attr; + pthread_attr_init(&attr); + pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE); + + // Calculates the web uniform probability once. + double webUniformProbability = 1. / vectorSize; + int runningThreads = 0; + + for (int i=0; ipagerankVector[co->position] = co->dF * co->pagerankVector[co->position]; + + + double normDifference = vectorNorm(co->previousPagerankVector, co->numberOfPages) - + vectorNorm(co->pagerankVector, co->numberOfPages); + + + co->pagerankVector[co->position] += normDifference * co->webUniformProbability + + co->linksFromConvergedPagesPagerankVector[co->position] + co->convergedPagerankVector[co->position]; + +} + +void csrSparseMatrixVectorMultiplication_threads(void* arg){ + threadArgs* co = (threadArgs *)arg; + //(CsrSparseMatrix sparseMatrix, + //double *vector, double **product, int vectorSize) { + // Initializes the elements of the product vector to zero + //for (int i=0; ipagerankVector[co->position] = 0; + //} + + //for (int i=0; itransitionMatrix[co->position].rowCumulativeIndexes[co->position], + endIndex = co->transitionMatrix[co->position].rowCumulativeIndexes[co->position+1]; + + if (startIndex == endIndex) { + // This row has no elements + return; + } + + double sum = 0; + for(int j=startIndex; jtransitionMatrix[co->position].columnIndexes[j]; + sum += co->transitionMatrix[co->position].values[j] * co->previousPagerankVector[elementColumn]; + } + + co->pagerankVector[co->position] = sum; + //} +} + + +/* + * vectorNorm calculates the first norm of a vector. +*/ +double vectorNorm(double *vector, int vectorSize) { + double norm = 0.; + + for (int i=0; i 10) { + validUsage(argumentVector[0]); + } + + (*parameters).numberOfPages = 0; + (*parameters).maxIterations = 0; + (*parameters).convergenceCriterion = 1; + (*parameters).dampingFactor = 0.85; + (*parameters).verbose = false; + (*parameters).history = false; + (*parameters).outputFilename = DEFAULT_OUTPUT_FILENAME; + + char *endPointer; + int argumentIndex = 1; + + while (argumentIndex < argumentCount) { + if (!strcmp(argumentVector[argumentIndex], ARGUMENT_CONVERGENCE_TOLERANCE)) { + argumentIndex = checkIncrement(argumentIndex, argumentCount, argumentVector[0]); + + double convergenceInput = strtod(argumentVector[argumentIndex], &endPointer); + if (convergenceInput == 0) { + printf("Invalid convergence argument\n"); + exit(EXIT_FAILURE); + } + (*parameters).convergenceCriterion = convergenceInput; + } else if (!strcmp(argumentVector[argumentIndex], ARGUMENT_MAX_ITERATIONS)) { + argumentIndex = checkIncrement(argumentIndex, argumentCount, argumentVector[0]); + + size_t iterationsInput = strtol(argumentVector[argumentIndex], &endPointer, NUMERICAL_BASE); + if (iterationsInput == 0 && endPointer) { + printf("Invalid iterations argument\n"); + exit(EXIT_FAILURE); + } + (*parameters).maxIterations = iterationsInput; + } else if (!strcmp(argumentVector[argumentIndex], ARGUMENT_DAMPING_FACTOR)) { + argumentIndex = checkIncrement(argumentIndex, argumentCount, argumentVector[0]); + + double alphaInput = strtod(argumentVector[argumentIndex], &endPointer); + if ((alphaInput == 0 || alphaInput > 1) && endPointer) { + printf("Invalid alpha argument\n"); + exit(EXIT_FAILURE); + } + (*parameters).dampingFactor = alphaInput; + } else if (!strcmp(argumentVector[argumentIndex], ARGUMENT_VERBAL_OUTPUT)) { + (*parameters).verbose = true; + } else if (!strcmp(argumentVector[argumentIndex], ARGUMENT_OUTPUT_HISTORY)) { + (*parameters).history = true; + } else if (!strcmp(argumentVector[argumentIndex], ARGUMENT_OUTPUT_FILENAME)) { + argumentIndex = checkIncrement(argumentIndex, argumentCount, argumentVector[0]); + + if (fopen(argumentVector[argumentIndex], "w") == NULL) { + printf("Invalid output filename. Reverting to default.\n"); + continue; + } + (*parameters).outputFilename = argumentVector[argumentIndex]; + } else if (argumentIndex == argumentCount - 1) { + (*parameters).graphFilename = argumentVector[argumentIndex]; + } else { + validUsage(argumentVector[0]); + exit(EXIT_FAILURE); + } + ++argumentIndex; + } +} + +/* + * readGraphFromFile loads the file supplied in the command line arguments to an + * array (directedWebGraph) that represents the graph. +*/ +void generateNormalizedTransitionMatrixFromFile(CsrSparseMatrix *transitionMatrix, + Parameters *parameters){ + FILE *graphFile; + + // Opens the file for reading + graphFile = fopen((*parameters).graphFilename, "r+"); + if (!graphFile) { + printf("Error opening file \n"); + exit(EXIT_FAILURE); + } + + char buffer[FILE_READ_BUFFER_SIZE]; + char *readResult; + // Skips the first two lines + readResult = fgets(buffer, FILE_READ_BUFFER_SIZE, graphFile); + readResult = fgets(buffer, FILE_READ_BUFFER_SIZE, graphFile); + if (readResult == NULL) { + printf("Error while reading from the file. Does the file have the correct format?\n"); + exit(EXIT_FAILURE); + } + + // Third line contains the numbers of nodes and edges + int numberOfNodes = 0, numberOfEdges = 0; + + readResult = fgets(buffer, FILE_READ_BUFFER_SIZE, graphFile); + if (readResult == NULL) { + printf("Error while reading from the file. Does the file have the correct format?\n"); + exit(EXIT_FAILURE); + } + + // Parses the number of nodes and number of edges + { + // Splits string to whitespace + char *token = strtok(buffer, " "); + bool nextIsNodes = false, nextIsEdges = false; + + while (token != NULL) { + if (strcmp(token, "Nodes:") == 0) { + nextIsNodes = true; + } else if (nextIsNodes) { + numberOfNodes = atoi(token); + nextIsNodes = false; + } else if (strcmp(token, "Edges:") == 0) { + nextIsEdges = true; + } else if (nextIsEdges) { + numberOfEdges = atoi(token); + break; + } + + // Gets next string token + token = strtok (NULL, " ,.-"); + } + } + + if ((*parameters).verbose) { + printf("File claims number of pages is: %d\nThe number of edges is: %d\n", + numberOfNodes, numberOfEdges); + } + + // Skips the fourth line + readResult = fgets(buffer, 512, graphFile); + if (readResult == NULL) { + printf("Error while reading from the file. Does the file have the correct format?\n"); + exit(EXIT_FAILURE); + } + + + int maxPageIndex = 0; + CooSparseMatrix tempMatrix = initCooSparseMatrix(); + allocMemoryForCoo(&tempMatrix, numberOfEdges); + + for (int i=0; i maxPageIndex) { + maxPageIndex = fileFrom; + } + if (fileTo > maxPageIndex) { + maxPageIndex = fileTo; + } + addElement(&tempMatrix, 1, fileFrom, fileTo); + } + + if ((*parameters).verbose) { + printf("Max page index found is: %d\n", maxPageIndex); + } + (*parameters).numberOfPages = maxPageIndex + 1; + + // Calculates the outdegree of each page and assigns the uniform probability + // of transition to the elements of the corresponding row + int* pageOutdegree = malloc((*parameters).numberOfPages*sizeof(int)); + for (int i=0; i<(*parameters).numberOfPages; ++i){ + pageOutdegree[i] = 0; + } + + for (int i=0; irowIndex; + ++pageOutdegree[currentRow]; + } + + for (int i=0; ivalue = 1./pageOutdegree[tempMatrix.elements[i]->rowIndex]; + } + free(pageOutdegree); + + // Transposes the temporary transition matrix (P^T). + transposeSparseMatrix(&tempMatrix); + + allocMemoryForCsr(transitionMatrix, numberOfEdges); + // Transforms the temporary COO matrix to the desired CSR format + transformToCSR(tempMatrix, transitionMatrix); + destroyCooSparseMatrix(&tempMatrix); + + fclose(graphFile); +} + +/* + * validUsage outputs a message to the console that informs the user of the + * correct (valid) way to use the program. +*/ +void validUsage(char *programName) { + printf("%s [-c convergence_criterion] [-m max_iterations] [-a alpha] [-v] [-h] [-o output_filename] " \ + "\n-c convergence_criterion" \ + "\n\tthe convergence tolerance criterion" \ + "\n-m max_iterations" \ + "\n\tmaximum number of iterations to perform" \ + "\n-a alpha" \ + "\n\tthe damping factor" \ + "\n-v enable verbal output" \ + "\n-h enable history output to file" \ + "\n-o output_filename" \ + "\n\tfilename and path for the output" \ + "\n", programName); + exit(EXIT_FAILURE); +} + +/* + * checkIncrement is a helper function for parseArguments function. +*/ +int checkIncrement(int previousIndex, int maxIndex, char *programName) { + if (previousIndex == maxIndex) { + validUsage(programName); + exit(EXIT_FAILURE); + } + return ++previousIndex; +} + +void savePagerankToFile(char *filename, bool append, double *pagerankVector, + int vectorSize, int iteration) { + FILE *outputFile; + + if (append) { + outputFile = fopen(filename, "a"); + } else { + outputFile = fopen(filename, "w"); + } + + if (outputFile == NULL) { + printf("Error while opening the output file.\n"); + return; + } + + // Saves the pagerank vector + //fprintf(outputFile, "Iteration %d:\t", iteration); + double sum = 0; + for (int i=0; i +#include +#include +#include +#include + +#include "coo_sparse_matrix.h" + +/* ===== DEFINITIONS ===== */ + +//Colors used for better console output formating. +#define ANSI_COLOR_RED "\x1B[31m" +#define ANSI_COLOR_GREEN "\x1B[32m" +#define ANSI_COLOR_YELLOW "\x1B[33m" +#define ANSI_COLOR_BLUE "\x1B[34m" +#define ANSI_COLOR_CYAN "\x1B[36m" +#define ANSI_COLOR_RESET "\x1B[0m" + +/* ===== CONSTANTS DEFINITION ===== */ + +// Constant strings that store the command line options available. +extern const char *ARGUMENT_CONVERGENCE_TOLERANCE; +extern const char *ARGUMENT_MAX_ITERATIONS; +extern const char *ARGUMENT_DAMPING_FACTOR; +extern const char *ARGUMENT_VERBAL_OUTPUT; +extern const char *ARGUMENT_OUTPUT_HISTORY; +extern const char *ARGUMENT_OUTPUT_FILENAME; +// The numerical base used when parsing numerical command line arguments. +extern const int NUMERICAL_BASE; +// Default filename used for the output. +extern char *DEFAULT_OUTPUT_FILENAME; +// The size of the buffer used for reading the graph input file. +extern const int FILE_READ_BUFFER_SIZE; + +/* ===== STRUCTURES ===== */ + +// A data structure to conveniently hold the algorithm's parameters. +typedef struct parameters { + int numberOfPages, maxIterations; + double convergenceCriterion, dampingFactor; + bool verbose, history; + char *outputFilename, *graphFilename; +} Parameters; + +/* ===== FUNCTION DEFINITIONS ===== */ + +// Function validUsage outputs the correct way to use the program with command +// line arguments. +void validUsage(char *programName); + +// Function checkIncrement is a helper function used in parseArguments (see +// bellow). +int checkIncrement(int previousIndex, int maxIndex, char *programName); + +// Function parseArguments parses command line arguments. +void parseArguments(int argumentCount, char **argumentVector, + Parameters *parameters); + +// Function generateNormalizedTransitionMatrixFromFile reads through the entries +// of the file specified in the arguments (parameters->graphFilename), using +// them to populate the sparse array (transitionMatrix). The entries of the file +// represent the edges of the web transition graph. The entries are then +// modified to become the rows of the transition matrix. +void generateNormalizedTransitionMatrixFromFile(CsrSparseMatrix *transitionMatrix, + Parameters *parameters); + +// Function savePagerankToFile appends or overwrites the pagerank vector +// "pagerankVector" to the file with the filename supplied in the arguments. +void savePagerankToFile(char *filename, bool append, double *pagerankVector, + int vectorSize, int iteration); + +// Function initialize allocates memory for the pagerank vector, reads the +// dataset from the file and creates the transition probability distribution +// matrix. +void initialize(CsrSparseMatrix *transitionMatrix, double **pagerankVector, + Parameters *parameters); + +// Function vectorNorm calculates the first norm of a vector. +double vectorNorm(double *vector, int vectorSize); + +// Function calculateNextPagerank calculates the next pagerank vector. +void calculateNextPagerank(CsrSparseMatrix *transitionMatrix, + double *previousPagerankVector, double **pagerankVector, + double *linksFromConvergedPagesPagerankVector, + double *convergedPagerankVector, int vectorSize, double dampingFactor, pthread_t *threads, int threadNum); + +// Function pagerank iteratively calculates the pagerank of each page until +// either the convergence criterion is met or the maximum number of iterations +// is reached. +int pagerank(CsrSparseMatrix *transitionMatrix, double **pagerankVector, + bool *convergenceStatus, Parameters parameters); + +void csrSparseMatrixVectorMultiplication_threads(void* arg); +void compPagerankVector_threads(void* arg); +#endif // SERIAL_GS_PAGERANK_FUNCTIONS_H \ No newline at end of file