diff --git a/openmp/Makefile b/openmp/Makefile new file mode 100644 index 0000000..2e297ea --- /dev/null +++ b/openmp/Makefile @@ -0,0 +1,37 @@ +SHELL := /bin/bash + +# ============================================ +# COMMANDS + +CC = gcc -std=gnu99 -fopenmp +RM = rm -f +CFLAGS_DEBUG=-O0 -ggdb3 -Wall -I. +CFLAGS=-O3 -Wall -I. +OBJ=serial_gs_pagerank.o serial_gs_pagerank_functions.o coo_sparse_matrix.o csr_sparse_matrix.o +DEPS=serial_gs_pagerank_functions.h coo_sparse_matrix.h csr_sparse_matrix.h + +# ========================================== +# TARGETS + +EXECUTABLES = pagerank.out + +.PHONY: all clean + +all: $(EXECUTABLES) + +# ========================================== +# DEPENDENCIES (HEADERS) + +%.o: %.c $(DEPS) + $(CC) -c -o $@ $< $(CFLAGS) + +.PRECIOUS: $(EXECUTABLES) $(OBJ) + +# ========================================== +# EXECUTABLE (MAIN) + +$(EXECUTABLES): $(OBJ) + $(CC) -o $@ $^ $(CFLAGS) + +clean: + $(RM) *.o *~ $(EXECUTABLES) diff --git a/openmp/coo_sparse_matrix.c b/openmp/coo_sparse_matrix.c new file mode 100644 index 0000000..93d0ac9 --- /dev/null +++ b/openmp/coo_sparse_matrix.c @@ -0,0 +1,125 @@ +#include "coo_sparse_matrix.h" + +CooSparseMatrix initCooSparseMatrix() { + CooSparseMatrix sparseMatrix; + sparseMatrix.size = 0; + sparseMatrix.numberOfNonZeroElements = 0; + sparseMatrix.elements = NULL; + return sparseMatrix; +} + +void allocMemoryForCoo(CooSparseMatrix *sparseMatrix, int numberOfElements) { + sparseMatrix->elements = (CooSparseMatrixElement **) malloc( + numberOfElements * sizeof(CooSparseMatrixElement *)); + sparseMatrix->size = numberOfElements; +} + +void addElement(CooSparseMatrix *sparseMatrix, double value, int row, int column) { + if (sparseMatrix->numberOfNonZeroElements == sparseMatrix->size) { + printf("%d == %d |||| %d, %d\n", sparseMatrix->numberOfNonZeroElements, + sparseMatrix->size, row, column); + printf("Number of non zero elements exceeded size of matrix!\n"); + exit(EXIT_FAILURE); + } + + // Creates the new element + CooSparseMatrixElement *newElement = (CooSparseMatrixElement *) malloc( + sizeof(CooSparseMatrixElement)); + newElement->value = value; + newElement->rowIndex = row; + newElement->columnIndex = column; + + sparseMatrix->elements[sparseMatrix->numberOfNonZeroElements] = newElement; + sparseMatrix->numberOfNonZeroElements = sparseMatrix->numberOfNonZeroElements + 1; +} + +void transposeSparseMatrix(CooSparseMatrix *sparseMatrix) { + for (int i=0; inumberOfNonZeroElements; ++i) { + CooSparseMatrixElement *element = sparseMatrix->elements[i]; + int tempRow = element->rowIndex; + element->rowIndex = element->columnIndex; + element->columnIndex = tempRow; + } +} + +void transformToCSR(CooSparseMatrix initialSparseMatrix, + CsrSparseMatrix *transformedSparseMatrix) { + // Taken from here: https://github.com/scipy/scipy/blob/3b36a57/scipy/sparse/sparsetools/coo.h#L34 + if (initialSparseMatrix.numberOfNonZeroElements > transformedSparseMatrix->size) { + printf("Transformed CSR matrix does not have enough space!\n"); + exit(EXIT_FAILURE); + } + + for (int i=0; irowIndex; + transformedSparseMatrix->rowCumulativeIndexes[rowIndex] = + transformedSparseMatrix->rowCumulativeIndexes[rowIndex] + 1; + } + + // Cumulative sums the non zero elements per row + for (int i=0, sum=0; isize+1; ++i){ + int temp = transformedSparseMatrix->rowCumulativeIndexes[i]; + transformedSparseMatrix->rowCumulativeIndexes[i] = sum; + sum += temp; + } + + for (int i=0; irowIndex; + int destinationIndex = transformedSparseMatrix->rowCumulativeIndexes[row]; + + transformedSparseMatrix->columnIndexes[destinationIndex] = initialSparseMatrix.elements[i]->columnIndex; + transformedSparseMatrix->values[destinationIndex] = initialSparseMatrix.elements[i]->value; + + transformedSparseMatrix->rowCumulativeIndexes[row]++; + } + + for (int i=0, last=0; i<=transformedSparseMatrix->size; i++){ + int temp = transformedSparseMatrix->rowCumulativeIndexes[i]; + transformedSparseMatrix->rowCumulativeIndexes[i] = last; + last = temp; + } + + transformedSparseMatrix->numberOfNonZeroElements = initialSparseMatrix.numberOfNonZeroElements; +} + +void cooSparseMatrixVectorMultiplication(CooSparseMatrix sparseMatrix, + double *vector, double **product, int vectorSize) { + // Initializes the elements of the product vector to zero + for (int i=0; irowIndex, column = element->columnIndex; + + if (row >= vectorSize) { + printf("Error at sparseMatrixVectorMultiplication. Matrix has more rows than vector!\n"); + printf("row = %d\n", row); + exit(EXIT_FAILURE); + } + + (*product)[row] = (*product)[row] + element->value * vector[column]; + } +} + +void destroyCooSparseMatrix(CooSparseMatrix *sparseMatrix) { + for (int i=0; inumberOfNonZeroElements; ++i) { + free(sparseMatrix->elements[i]); + } + free(sparseMatrix->elements); +} + +void printCooSparseMatrix(CooSparseMatrix sparseMatrix) { + if (sparseMatrix.numberOfNonZeroElements == 0) { + return; + } + + CooSparseMatrixElement *element; + for (int i=0; irowIndex, element->columnIndex, + element->value); + } +} \ No newline at end of file diff --git a/openmp/coo_sparse_matrix.h b/openmp/coo_sparse_matrix.h new file mode 100644 index 0000000..dd4c31d --- /dev/null +++ b/openmp/coo_sparse_matrix.h @@ -0,0 +1,33 @@ +#ifndef COO_SPARSE_MATRIX_H /* Include guard */ +#define COO_SPARSE_MATRIX_H + +#include +#include +#include +#include + +#include "csr_sparse_matrix.h" + +typedef struct cooSparseMatrixElement { + double value; + int rowIndex, columnIndex; +} CooSparseMatrixElement; + +typedef struct cooSparseMatrix { + int size, numberOfNonZeroElements; + CooSparseMatrixElement **elements; +} CooSparseMatrix; + +CooSparseMatrix initCooSparseMatrix(); +void allocMemoryForCoo(CooSparseMatrix *sparseMatrix, int numberOfElements); +void addElement(CooSparseMatrix *sparseMatrix, double value, int row, + int column); +void transposeSparseMatrix(CooSparseMatrix *sparseMatrix); +void transformToCSR(CooSparseMatrix initialSparseMatrix, + CsrSparseMatrix *transformedSparseMatrix); +void cooSparseMatrixVectorMultiplication(CooSparseMatrix sparseMatrix, + double *vector, double **product, int vectorSize); +void destroyCooSparseMatrix(CooSparseMatrix *sparseMatrix); +void printCooSparseMatrix(CooSparseMatrix sparseMatrix); + +#endif // COO_SPARSE_MATRIX_H \ No newline at end of file diff --git a/openmp/coo_sparse_matrix.o b/openmp/coo_sparse_matrix.o new file mode 100644 index 0000000..8b80d4e Binary files /dev/null and b/openmp/coo_sparse_matrix.o differ diff --git a/openmp/csr_sparse_matrix.c b/openmp/csr_sparse_matrix.c new file mode 100644 index 0000000..bdf8413 --- /dev/null +++ b/openmp/csr_sparse_matrix.c @@ -0,0 +1,93 @@ +#include "csr_sparse_matrix.h" + +CsrSparseMatrix initCsrSparseMatrix() { + CsrSparseMatrix sparseMatrix; + sparseMatrix.size = 0; + sparseMatrix.numberOfNonZeroElements = 0; + + sparseMatrix.values = NULL; + sparseMatrix.columnIndexes = NULL; + sparseMatrix.rowCumulativeIndexes = NULL; + return sparseMatrix; +} + +void allocMemoryForCsr(CsrSparseMatrix *sparseMatrix, int numberOfElements) { + sparseMatrix->values = (double *) malloc(numberOfElements * sizeof(double)); + sparseMatrix->columnIndexes = (int *) malloc( + numberOfElements * sizeof(int)); + sparseMatrix->rowCumulativeIndexes = (int *) malloc( + (numberOfElements + 1) * sizeof(int)); + + for (int i=0; irowCumulativeIndexes[i] = 0; + } + sparseMatrix->size = numberOfElements; +} + +// Row indexes start from 0! +void zeroOutRow(CsrSparseMatrix *sparseMatrix, int row) { + int startIndex = sparseMatrix->rowCumulativeIndexes[row], + endIndex = sparseMatrix->rowCumulativeIndexes[row+1]; + for (int i=startIndex; ivalues[i] = 0; + } +} + +void zeroOutColumn(CsrSparseMatrix *sparseMatrix, int column) { + for (int i=0; inumberOfNonZeroElements; ++i){ + if(sparseMatrix->columnIndexes[i] == column){ + // Zeros out this element + sparseMatrix->values[i] = 0; + } + } +} + +void csrSparseMatrixVectorMultiplication(CsrSparseMatrix sparseMatrix, + double *vector, double **product, int vectorSize) { + // Initializes the elements of the product vector to zero + for (int i=0; ivalues); + free(sparseMatrix->rowCumulativeIndexes); + free(sparseMatrix->columnIndexes); +} + +void printCsrSparseMatrix(CsrSparseMatrix sparseMatrix) { + if (sparseMatrix.size == 0) { + return; + } + + for (int i=0; i +#include +#include +#include + +typedef struct csrSparseMatrix { + int size, numberOfNonZeroElements; + int *rowCumulativeIndexes, *columnIndexes; + double *values; +} CsrSparseMatrix; + +CsrSparseMatrix initCsrSparseMatrix(); +void allocMemoryForCsr(CsrSparseMatrix *sparseMatrix, int numberOfElements); +void zeroOutRow(CsrSparseMatrix *sparseMatrix, int row); +void zeroOutColumn(CsrSparseMatrix *sparseMatrix, int column); +void csrSparseMatrixVectorMultiplication(CsrSparseMatrix sparseMatrix, + double *vector, double **product, int vectorSize); +void destroyCsrSparseMatrix(CsrSparseMatrix *sparseMatrix); +void printCsrSparseMatrix(CsrSparseMatrix sparseMatrix); + +#endif // CSR_SPARSE_MATRIX_H \ No newline at end of file diff --git a/openmp/csr_sparse_matrix.o b/openmp/csr_sparse_matrix.o new file mode 100644 index 0000000..c5872b4 Binary files /dev/null and b/openmp/csr_sparse_matrix.o differ diff --git a/openmp/pagerank.out b/openmp/pagerank.out new file mode 100644 index 0000000..37ff79d Binary files /dev/null and b/openmp/pagerank.out differ diff --git a/openmp/serial_gs_pagerank.c b/openmp/serial_gs_pagerank.c new file mode 100644 index 0000000..3836cf8 --- /dev/null +++ b/openmp/serial_gs_pagerank.c @@ -0,0 +1,44 @@ +#include +#include +#include "serial_gs_pagerank_functions.h" +//#include "coo_sparse_matrix.h" + +struct timeval startwtime, endwtime; +double seq_time; + +int main(int argc, char **argv) { + CsrSparseMatrix transitionMatrix = initCsrSparseMatrix(); + double *pagerankVector; + bool convergenceStatus; + Parameters parameters; + omp_set_dynamic(0); + parseArguments(argc, argv, ¶meters); + + initialize(&transitionMatrix, &pagerankVector, ¶meters); + + // Starts wall-clock timer + gettimeofday (&startwtime, NULL); + + int iterations = pagerank(&transitionMatrix, &pagerankVector, + &convergenceStatus, parameters); + if (parameters.verbose) { + printf(ANSI_COLOR_YELLOW "\n----- RESULTS -----\n" ANSI_COLOR_RESET); + if (convergenceStatus) { + printf(ANSI_COLOR_GREEN "Pagerank converged after %d iterations!\n" \ + ANSI_COLOR_RESET, iterations); + } else { + printf(ANSI_COLOR_RED "Pagerank did not converge after max number of" \ + " iterations (%d) was reached!\n" ANSI_COLOR_RESET, iterations); + } + } + + // Stops wall-clock timer + gettimeofday (&endwtime, NULL); + double seq_time = (double)((endwtime.tv_usec - startwtime.tv_usec)/1.0e6 + + endwtime.tv_sec - startwtime.tv_sec); + printf("%s wall clock time = %f\n","Pagerank (Gauss-Seidel method), serial implementation", + seq_time); + + free(pagerankVector); + destroyCsrSparseMatrix(&transitionMatrix); +} diff --git a/openmp/serial_gs_pagerank.o b/openmp/serial_gs_pagerank.o new file mode 100644 index 0000000..366656b Binary files /dev/null and b/openmp/serial_gs_pagerank.o differ diff --git a/openmp/serial_gs_pagerank_functions.c b/openmp/serial_gs_pagerank_functions.c new file mode 100644 index 0000000..cb406c1 --- /dev/null +++ b/openmp/serial_gs_pagerank_functions.c @@ -0,0 +1,512 @@ +/* ===== INCLUDES ===== */ + +#include "serial_gs_pagerank_functions.h" +#include +/* ===== CONSTANTS ===== */ + +const char *ARGUMENT_CONVERGENCE_TOLERANCE = "-c"; +const char *ARGUMENT_MAX_ITERATIONS = "-m"; +const char *ARGUMENT_DAMPING_FACTOR = "-a"; +const char *ARGUMENT_VERBAL_OUTPUT = "-v"; +const char *ARGUMENT_OUTPUT_HISTORY = "-h"; +const char *ARGUMENT_OUTPUT_FILENAME = "-o"; + +const int NUMERICAL_BASE = 10; +char *DEFAULT_OUTPUT_FILENAME = "pagerank_output"; +const int FILE_READ_BUFFER_SIZE = 4096; + +const int CONVERGENCE_CHECK_ITERATION_PERIOD = 3; +const int SPARSITY_INCREASE_ITERATION_PERIOD = 3; + +/* ===== FUNCTIONS ===== */ + +int pagerank(CsrSparseMatrix *transitionMatrix, double **pagerankVector, + bool *convergenceStatus, Parameters parameters) { + // Variables declaration + int iterations = 0, numberOfPages = parameters.numberOfPages; + double delta, *pagerankDifference, *previousPagerankVector, + *convergedPagerankVector, *linksFromConvergedPagesPagerankVector; + CooSparseMatrix linksFromConvergedPages = initCooSparseMatrix(); + bool *convergenceMatrix; + + int P = omp_get_max_threads(); + omp_set_num_threads(P); + + // Space allocation + { + size_t sizeofDouble = sizeof(double); + // pagerankDifference used to calculate delta + pagerankDifference = (double *) malloc(numberOfPages * sizeofDouble); + // previousPagerankVector holds last iteration's pagerank vector + previousPagerankVector = (double *) malloc(numberOfPages * sizeofDouble); + // convergedPagerankVector is the pagerank vector of converged pages only + convergedPagerankVector = (double *) malloc(numberOfPages * sizeofDouble); + // linksFromConvergedPagesPagerankVector holds the partial sum of the + // pagerank vector, that describes effect of the links from converged + // pages to non converged pages + linksFromConvergedPagesPagerankVector = (double *) malloc(numberOfPages * sizeofDouble); + // convergenceMatrix indicates which pages have converged + convergenceMatrix = (bool *) malloc(numberOfPages * sizeof(bool)); + *convergenceStatus = false; + + // Initialization + allocMemoryForCoo(&linksFromConvergedPages, transitionMatrix->numberOfNonZeroElements); + #pragma omp parallel for num_threads(P) + for (int i=0; irowCumulativeIndexes[i], + rowEndIndex = transitionMatrix->rowCumulativeIndexes[i+1]; + if (rowEndIndex > rowStartIndex) { + // This row (page) has non zero elements (out-links) + for (int j=rowStartIndex; jcolumnIndexes[j]; + if (convergenceMatrix[pageLinksTo] == false){ + // Link exists, adds element to the vector + addElement(&linksFromConvergedPages, + transitionMatrix->values[j], i, pageLinksTo); + } + } + } + + // Increases sparsity of the transition matrix by + // deleting elements that correspond to converged pages + zeroOutRow(transitionMatrix, i); + zeroOutColumn(transitionMatrix, i); + + // Builds the new linksFromConvergedPagesPagerankVector + cooSparseMatrixVectorMultiplication(linksFromConvergedPages, + *pagerankVector, &linksFromConvergedPagesPagerankVector, + numberOfPages); + } + } + free(newlyConvergedPages); + } + + ++iterations; + // Outputs information about this iteration + if (iterations%2) { + printf(ANSI_COLOR_BLUE "Iteration %d: delta = %f\n" ANSI_COLOR_RESET, iterations, delta); + } else { + printf(ANSI_COLOR_CYAN "Iteration %d: delta = %f\n" ANSI_COLOR_RESET, iterations, delta); + } + } while (!*convergenceStatus && (parameters.maxIterations == 0 || + iterations < parameters.maxIterations)); + parameters.realIterations = iterations; + if (!parameters.history) { + // Outputs last pagerank vector to file + savePagerankToFile(parameters.outputFilename, false, *pagerankVector, + numberOfPages, parameters.realIterations); + } + + // Frees memory + free(pagerankDifference); + free(previousPagerankVector); + free(convergedPagerankVector); + free(linksFromConvergedPagesPagerankVector); + free(convergenceMatrix); + destroyCooSparseMatrix(&linksFromConvergedPages); + + return iterations; +} + +/* + * initialize allocates required memory for arrays, reads the web graph from the + * from the file and creates the initial transition probability distribution + * matrix. +*/ +void initialize(CsrSparseMatrix *transitionMatrix, + double **pagerankVector, Parameters *parameters) { + + // Reads web graph from file + if ((*parameters).verbose) { + printf(ANSI_COLOR_YELLOW "----- Reading graph from file -----\n" ANSI_COLOR_RESET); + } + generateNormalizedTransitionMatrixFromFile(transitionMatrix, parameters); + + // Outputs the algorithm parameters to the console + if ((*parameters).verbose) { + printf(ANSI_COLOR_YELLOW "\n----- Running with parameters -----\n" ANSI_COLOR_RESET\ + "Number of pages: %d", (*parameters).numberOfPages); + if (!(*parameters).maxIterations) { + printf("\nMaximum number of iterations: inf"); + } else { + printf("\nMaximum number of iterations: %d", (*parameters).maxIterations); + } + printf("\nConvergence criterion: %f" \ + "\nDamping factor: %f" \ + "\nGraph filename: %s\n", (*parameters).convergenceCriterion, + (*parameters).dampingFactor, (*parameters).graphFilename); + } + (*parameters).realIterations = 0; + // Allocates memory for the pagerank vector + (*pagerankVector) = (double *) malloc((*parameters).numberOfPages * sizeof(double)); + double webUniformProbability = 1. / (*parameters).numberOfPages; + for (int i=0; i<(*parameters).numberOfPages; ++i) { + (*pagerankVector)[i] = webUniformProbability; + } +} + +// ==================== MATH UTILS ==================== + +/* + * calculateNextPagerank calculates the product of the multiplication + * between a matrix and the a vector in a cheap way. +*/ +void calculateNextPagerank(CsrSparseMatrix *transitionMatrix, + double *previousPagerankVector, double **pagerankVector, + double *linksFromConvergedPagesPagerankVector, + double *convergedPagerankVector, int vectorSize, double dampingFactor) { + // Calculates the web uniform probability once. + + + double webUniformProbability = 1. / vectorSize; + + csrSparseMatrixVectorMultiplication(*transitionMatrix, previousPagerankVector, + pagerankVector, vectorSize); + #pragma omp parallel for + for (int i=0; i 10) { + validUsage(argumentVector[0]); + } + + (*parameters).numberOfPages = 0; + (*parameters).maxIterations = 0; + (*parameters).convergenceCriterion = 1; + (*parameters).dampingFactor = 0.85; + (*parameters).verbose = false; + (*parameters).history = false; + (*parameters).outputFilename = DEFAULT_OUTPUT_FILENAME; + + char *endPointer; + int argumentIndex = 1; + + while (argumentIndex < argumentCount) { + if (!strcmp(argumentVector[argumentIndex], ARGUMENT_CONVERGENCE_TOLERANCE)) { + argumentIndex = checkIncrement(argumentIndex, argumentCount, argumentVector[0]); + + double convergenceInput = strtod(argumentVector[argumentIndex], &endPointer); + if (convergenceInput == 0) { + printf("Invalid convergence argument\n"); + exit(EXIT_FAILURE); + } + (*parameters).convergenceCriterion = convergenceInput; + } else if (!strcmp(argumentVector[argumentIndex], ARGUMENT_MAX_ITERATIONS)) { + argumentIndex = checkIncrement(argumentIndex, argumentCount, argumentVector[0]); + + size_t iterationsInput = strtol(argumentVector[argumentIndex], &endPointer, NUMERICAL_BASE); + if (iterationsInput == 0 && endPointer) { + printf("Invalid iterations argument\n"); + exit(EXIT_FAILURE); + } + (*parameters).maxIterations = iterationsInput; + } else if (!strcmp(argumentVector[argumentIndex], ARGUMENT_DAMPING_FACTOR)) { + argumentIndex = checkIncrement(argumentIndex, argumentCount, argumentVector[0]); + + double alphaInput = strtod(argumentVector[argumentIndex], &endPointer); + if ((alphaInput == 0 || alphaInput > 1) && endPointer) { + printf("Invalid alpha argument\n"); + exit(EXIT_FAILURE); + } + (*parameters).dampingFactor = alphaInput; + } else if (!strcmp(argumentVector[argumentIndex], ARGUMENT_VERBAL_OUTPUT)) { + (*parameters).verbose = true; + } else if (!strcmp(argumentVector[argumentIndex], ARGUMENT_OUTPUT_HISTORY)) { + (*parameters).history = true; + } else if (!strcmp(argumentVector[argumentIndex], ARGUMENT_OUTPUT_FILENAME)) { + argumentIndex = checkIncrement(argumentIndex, argumentCount, argumentVector[0]); + + if (fopen(argumentVector[argumentIndex], "w") == NULL) { + printf("Invalid output filename. Reverting to default.\n"); + continue; + } + (*parameters).outputFilename = argumentVector[argumentIndex]; + } else if (argumentIndex == argumentCount - 1) { + (*parameters).graphFilename = argumentVector[argumentIndex]; + } else { + validUsage(argumentVector[0]); + exit(EXIT_FAILURE); + } + ++argumentIndex; + } +} + +/* + * readGraphFromFile loads the file supplied in the command line arguments to an + * array (directedWebGraph) that represents the graph. +*/ +void generateNormalizedTransitionMatrixFromFile(CsrSparseMatrix *transitionMatrix, + Parameters *parameters){ + FILE *graphFile; + + // Opens the file for reading + graphFile = fopen((*parameters).graphFilename, "r+"); + if (!graphFile) { + printf("Error opening file \n"); + exit(EXIT_FAILURE); + } + + char buffer[FILE_READ_BUFFER_SIZE]; + char *readResult; + // Skips the first two lines + readResult = fgets(buffer, FILE_READ_BUFFER_SIZE, graphFile); + readResult = fgets(buffer, FILE_READ_BUFFER_SIZE, graphFile); + if (readResult == NULL) { + printf("Error while reading from the file. Does the file have the correct format?\n"); + exit(EXIT_FAILURE); + } + + // Third line contains the numbers of nodes and edges + int numberOfNodes = 0, numberOfEdges = 0; + + readResult = fgets(buffer, FILE_READ_BUFFER_SIZE, graphFile); + if (readResult == NULL) { + printf("Error while reading from the file. Does the file have the correct format?\n"); + exit(EXIT_FAILURE); + } + + // Parses the number of nodes and number of edges + { + // Splits string to whitespace + char *token = strtok(buffer, " "); + bool nextIsNodes = false, nextIsEdges = false; + + while (token != NULL) { + if (strcmp(token, "Nodes:") == 0) { + nextIsNodes = true; + } else if (nextIsNodes) { + numberOfNodes = atoi(token); + nextIsNodes = false; + } else if (strcmp(token, "Edges:") == 0) { + nextIsEdges = true; + } else if (nextIsEdges) { + numberOfEdges = atoi(token); + break; + } + + // Gets next string token + token = strtok (NULL, " ,.-"); + } + } + + if ((*parameters).verbose) { + printf("File claims number of pages is: %d\nThe number of edges is: %d\n", + numberOfNodes, numberOfEdges); + } + + // Skips the fourth line + readResult = fgets(buffer, 512, graphFile); + if (readResult == NULL) { + printf("Error while reading from the file. Does the file have the correct format?\n"); + exit(EXIT_FAILURE); + } + + + int maxPageIndex = 0; + CooSparseMatrix tempMatrix = initCooSparseMatrix(); + allocMemoryForCoo(&tempMatrix, numberOfEdges); + + for (int i=0; i maxPageIndex) { + maxPageIndex = fileFrom; + } + if (fileTo > maxPageIndex) { + maxPageIndex = fileTo; + } + addElement(&tempMatrix, 1, fileFrom, fileTo); + } + + if ((*parameters).verbose) { + printf("Max page index found is: %d\n", maxPageIndex); + } + (*parameters).numberOfPages = maxPageIndex + 1; + + // Calculates the outdegree of each page and assigns the uniform probability + // of transition to the elements of the corresponding row + + int* pageOutdegree = malloc((*parameters).numberOfPages*sizeof(int)); + for (int i=0; i<(*parameters).numberOfPages; ++i){ + pageOutdegree[i] = 0; + } + + + for (int i=0; irowIndex; + + if (currentRow == tempMatrix.elements[i]->rowIndex) { + ++pageOutdegree[currentRow]; + } + + + } + + for (int i=0; ivalue = 1./pageOutdegree[tempMatrix.elements[i]->rowIndex]; + } + + // Transposes the temporary transition matrix (P^T). + transposeSparseMatrix(&tempMatrix); + allocMemoryForCsr(transitionMatrix, numberOfEdges); + // Transforms the temporary COO matrix to the desired CSR format + transformToCSR(tempMatrix, transitionMatrix); + //printCsrSparseMatrix(*transitionMatrix); + destroyCooSparseMatrix(&tempMatrix); + + fclose(graphFile); +} + +/* + * validUsage outputs a message to the console that informs the user of the + * correct (valid) way to use the program. +*/ +void validUsage(char *programName) { + printf("%s [-c convergence_criterion] [-m max_iterations] [-a alpha] [-v] [-h] [-o output_filename] " \ + "\n-c convergence_criterion" \ + "\n\tthe convergence tolerance criterion" \ + "\n-m max_iterations" \ + "\n\tmaximum number of iterations to perform" \ + "\n-a alpha" \ + "\n\tthe damping factor" \ + "\n-v enable verbal output" \ + "\n-h enable history output to file" \ + "\n-o output_filename" \ + "\n\tfilename and path for the output" \ + "\n", programName); + exit(EXIT_FAILURE); +} + +/* + * checkIncrement is a helper function for parseArguments function. +*/ +int checkIncrement(int previousIndex, int maxIndex, char *programName) { + if (previousIndex == maxIndex) { + validUsage(programName); + exit(EXIT_FAILURE); + } + return ++previousIndex; +} + +void savePagerankToFile(char *filename, bool append, double *pagerankVector, + int vectorSize, int realIterations) { + FILE *outputFile; + + if (append) { + outputFile = fopen(filename, "a"); + } else { + outputFile = fopen(filename, "w"); + } + + if (outputFile == NULL) { + printf("Error while opening the output file.\n"); + return; + } + //Save numberofPages and convergence time + + for (int i=0; i +#include +#include +#include +#include + +#include "coo_sparse_matrix.h" + +/* ===== DEFINITIONS ===== */ + +//Colors used for better console output formating. +#define ANSI_COLOR_RED "\x1B[31m" +#define ANSI_COLOR_GREEN "\x1B[32m" +#define ANSI_COLOR_YELLOW "\x1B[33m" +#define ANSI_COLOR_BLUE "\x1B[34m" +#define ANSI_COLOR_CYAN "\x1B[36m" +#define ANSI_COLOR_RESET "\x1B[0m" + +/* ===== CONSTANTS DEFINITION ===== */ + +// Constant strings that store the command line options available. +extern const char *ARGUMENT_CONVERGENCE_TOLERANCE; +extern const char *ARGUMENT_MAX_ITERATIONS; +extern const char *ARGUMENT_DAMPING_FACTOR; +extern const char *ARGUMENT_VERBAL_OUTPUT; +extern const char *ARGUMENT_OUTPUT_HISTORY; +extern const char *ARGUMENT_OUTPUT_FILENAME; +// The numerical base used when parsing numerical command line arguments. +extern const int NUMERICAL_BASE; +// Default filename used for the output. +extern char *DEFAULT_OUTPUT_FILENAME; +// The size of the buffer used for reading the graph input file. +extern const int FILE_READ_BUFFER_SIZE; + +/* ===== STRUCTURES ===== */ + +// A data structure to conveniently hold the algorithm's parameters. +typedef struct parameters { + int numberOfPages, maxIterations, realIterations; + double convergenceCriterion, dampingFactor; + bool verbose, history; + char *outputFilename, *graphFilename; +} Parameters; + +/* ===== FUNCTION DEFINITIONS ===== */ + +// Function validUsage outputs the correct way to use the program with command +// line arguments. +void validUsage(char *programName); + +// Function checkIncrement is a helper function used in parseArguments (see +// bellow). +int checkIncrement(int previousIndex, int maxIndex, char *programName); + +// Function parseArguments parses command line arguments. +void parseArguments(int argumentCount, char **argumentVector, + Parameters *parameters); + +// Function generateNormalizedTransitionMatrixFromFile reads through the entries +// of the file specified in the arguments (parameters->graphFilename), using +// them to populate the sparse array (transitionMatrix). The entries of the file +// represent the edges of the web transition graph. The entries are then +// modified to become the rows of the transition matrix. +void generateNormalizedTransitionMatrixFromFile(CsrSparseMatrix *transitionMatrix, + Parameters *parameters); + +// Function savePagerankToFile appends or overwrites the pagerank vector +// "pagerankVector" to the file with the filename supplied in the arguments. +void savePagerankToFile(char *filename, bool append, double *pagerankVector, + int vectorSize, int realIterations); + +// Function initialize allocates memory for the pagerank vector, reads the +// dataset from the file and creates the transition probability distribution +// matrix. +void initialize(CsrSparseMatrix *transitionMatrix, double **pagerankVector, + Parameters *parameters); + +// Function vectorNorm calculates the first norm of a vector. +double vectorNorm(double *vector, int vectorSize); + +// Function calculateNextPagerank calculates the next pagerank vector. +void calculateNextPagerank(CsrSparseMatrix *transitionMatrix, + double *previousPagerankVector, double **pagerankVector, + double *linksFromConvergedPagesPagerankVector, + double *convergedPagerankVector, int vectorSize, double dampingFactor); + +// Function pagerank iteratively calculates the pagerank of each page until +// either the convergence criterion is met or the maximum number of iterations +// is reached. +int pagerank(CsrSparseMatrix *transitionMatrix, double **pagerankVector, + bool *convergenceStatus, Parameters parameters); + +#endif // SERIAL_GS_PAGERANK_FUNCTIONS_H \ No newline at end of file diff --git a/openmp/serial_gs_pagerank_functions.o b/openmp/serial_gs_pagerank_functions.o new file mode 100644 index 0000000..6e91ac7 Binary files /dev/null and b/openmp/serial_gs_pagerank_functions.o differ