diff --git a/serial_csr/csr_sparse_matrix.c b/serial_csr/csr_sparse_matrix.c new file mode 100644 index 0000000..1cacdf9 --- /dev/null +++ b/serial_csr/csr_sparse_matrix.c @@ -0,0 +1,279 @@ +#include "csr_sparse_matrix.h" + + +CsrSparseMatrix initCsrSparseMatrix() { + CsrSparseMatrix sparseMatrix; + sparseMatrix.size = 0; + sparseMatrix.nnz = 0; + sparseMatrix.values = NULL; + sparseMatrix.columnIndexes = NULL; + sparseMatrix.rowaccInd = NULL; + return sparseMatrix; +} + +void allocMemoryForElements (CsrSparseMatrix *sparseMatrix, int size, int nnz) { + sparseMatrix->values = (double *) malloc( + nnz * sizeof(double)); + sparseMatrix->columnIndexes = (int *) malloc( + nnz * sizeof(int)); + sparseMatrix->rowaccInd = (int *) malloc( + size * sizeof(int)); + sparseMatrix->nnz = nnz; + sparseMatrix->size = size; +} + +void addElements(CsrSparseMatrix *sparseMatrix, int *fileFromMatrix, int *fileToMatrix) { + printf("I am in add elements \n"); + //initialize + for(int i=0; isize; ++i){ + sparseMatrix->rowaccInd[i] = 0; + } + for(int i=0; innz; ++i){ + sparseMatrix->rowaccInd[fileFromMatrix[i]]++; + } + for(int i=1; isize; ++i){ + sparseMatrix->rowaccInd[i]+=sparseMatrix->rowaccInd[i-1]; + } + printf("I am in add elements 2\n"); + int k=0; + for(int i=0; isize; ++i){ + for(int j = 0; jnnz; ++j){ + if(fileFromMatrix[j] == i){ + do{ + sparseMatrix->values[sparseMatrix->rowaccInd[i-1]+k] = 1; + sparseMatrix->columnIndexes[sparseMatrix->rowaccInd[i-1]+k] = fileToMatrix[j]; + ++k; + }while(krowaccInd[i]-sparseMatrix->rowaccInd[i-1]); + k = 0; + j = sparseMatrix->nnz; + } + + } + + printf("I am in add elements %d\n", i); + } + printf("I finished add elements \n"); +} + + +void zeroOutRow(CsrSparseMatrix *sparseMatrix, int row) { + int noofnnzinrow; + if(row==0){ + noofnnzinrow = sparseMatrix->rowaccInd[row]; + } + else{ + noofnnzinrow = sparseMatrix->rowaccInd[row]-sparseMatrix->rowaccInd[row-1]; + } + int startdeleteInd = sparseMatrix->rowaccInd[row-1]+1; + + //delete the values and columnindexes of these rows by moving up the rest + for(int i=0; ivalues[i+startdeleteInd] = sparseMatrix->values[sparseMatrix->nnz-noofnnzinrow+i]; + sparseMatrix->values[sparseMatrix->nnz-noofnnzinrow+i] = 0; + sparseMatrix->columnIndexes[i+startdeleteInd] = sparseMatrix->columnIndexes[sparseMatrix->nnz-noofnnzinrow+i]; + sparseMatrix->columnIndexes[sparseMatrix->nnz-noofnnzinrow+i] = 0; + } + sparseMatrix->nnz = sparseMatrix->nnz - noofnnzinrow; + + //substract from accumulative no. of row nnz elements + for(int i=row; isize ; ++i){ + sparseMatrix->rowaccInd[i] -= noofnnzinrow; + } + + /*for (int i=0; isize; ++i) { + CooSparseMatrixElement *element = sparseMatrix->elements[i]; + if (element->rowIndex == row) { + element->value = 0; + } + }*/ +} + + +void zeroOutColumn(CsrSparseMatrix *sparseMatrix, int column) { + + /*for (int i=0; isize; ++i) { + CooSparseMatrixElement *element = sparseMatrix->elements[i]; + if (element->columnIndex == column) { + element->value = 0; + } + } + */ + for (int i=0; innz; ++i){ + if(sparseMatrix->columnIndexes[i] == column){ + //delete columns by moving up the rest + for(int j=i; jnnz-1; ++j){ + sparseMatrix->columnIndexes[j] = sparseMatrix->columnIndexes[j+1]; + sparseMatrix->values[j] = sparseMatrix->values[j+1]; + } + int flag = 0; + //adjust rowaccInd + for(int j=0; jsize; ++j){ + if(sparseMatrix->rowaccInd[j] > i){ + flag = 1; //must be substracted since column belonged to this row + } + if(flag){ + --sparseMatrix->rowaccInd[j]; //substract till end of rows + } + } + + } + } + +} + +int *getRowIndexes(CsrSparseMatrix sparseMatrix, int row, int *rowSize) { + *rowSize = 0; + /*for (int i=0; irowIndex == row) { + ++(*rowSize); + } + } + + if (!(*rowSize)) { + return NULL; + }*/ + if((row-1)>0 && (sparseMatrix.rowaccInd[row]-sparseMatrix.rowaccInd[row-1])>0){ + (*rowSize) = sparseMatrix.rowaccInd[row]-sparseMatrix.rowaccInd[row-1]; + } + else if((sparseMatrix.rowaccInd[row]-sparseMatrix.rowaccInd[row-1])>0){ //if row = 0 + (*rowSize) = sparseMatrix.rowaccInd[row]; + } + else{ + return NULL; + } + + int *indexes = (int *) malloc((*rowSize) * sizeof(int)); + for (int i=1; i<=(*rowSize); ++i) { + + indexes[i-1] = sparseMatrix.rowaccInd[row-1]+i; + + } + + return indexes; +} + +void transposeSparseMatrix(CsrSparseMatrix *sparseMatrix) { + /*for (int i=0; isize; ++i) { + CooSparseMatrixElement *element = sparseMatrix->elements[i]; + int tempRow = element->rowIndex; + element->rowIndex = element->columnIndex; + element->columnIndex = tempRow; + }*/ + double* values_t = (double *) malloc( + sparseMatrix->size * sizeof(double)); + int* rowIndexes = (int *) malloc( + sparseMatrix->size * sizeof(int)); + int* colaccInd = (int *) malloc( + sparseMatrix->size * sizeof(int)); + + + + int columncount, nnznew = 0; + //for all columns + for(columncount = 0; columncountsize; ++columncount){ + //index for searching in columnIndexes matrix + for(int i = 0; innz;++i){ + if(sparseMatrix->columnIndexes[i] == columncount){ + //Find which row it belongs to + for(int j=0; jsize; ++j){ + if(sparseMatrix->rowaccInd[j] == i){ + rowIndexes[nnznew] = j-1; + values_t[nnznew] = sparseMatrix->values[i]; + for(int k=i; ksize; ++k){ + ++colaccInd[k]; + } + ++nnznew; + } + } + + } + } + } + + memcpy(sparseMatrix->values, values_t, sparseMatrix->size*sizeof(double)); + memcpy(sparseMatrix->columnIndexes, rowIndexes, sparseMatrix->size*sizeof(int)); + memcpy(sparseMatrix->rowaccInd, colaccInd, sparseMatrix->size*sizeof(int) ); + sparseMatrix->nnz = nnznew; +} + +void csrSparseMatrixVectorMultiplication(CsrSparseMatrix sparseMatrix, + double *vector, double **product, int vectorSize) { + // Initializes the elements of the product vector to zero + for (int i=0; irowIndex, column = element->columnIndex; + + if (row >= vectorSize) { + printf("Error at sparseMatrixVectorMultiplication. Matrix has more rows than vector!\n"); + printf("row = %d\n", row); + exit(EXIT_FAILURE); + } + + (*product)[row] = (*product)[row] + element->value * vector[column]; + }*/ + int t; + //for every row + for (int i=0; ik){ + printf("Error at sparseMatrixVectorMultiplication. Matrix has more columns than vector rows!\n"); + exit(EXIT_FAILURE); + } + } + + } + } + + +} + +void destroyCsrSparseMatrix(CsrSparseMatrix *sparseMatrix) { + /*for (int i=0; isize; ++i) { + free(sparseMatrix->elements[i]); + }*/ + free(sparseMatrix->values); + free(sparseMatrix->rowaccInd); + free(sparseMatrix->columnIndexes); + +} + +void printCsrSparseMatrix(CsrSparseMatrix sparseMatrix) { + if (sparseMatrix.size == 0) { + return; + } + /* + CooSparseMatrixElement *element; + for (int i=0; irowIndex, element->columnIndex, + element->value); + }*/ + int t; + for (int i=0; i +#include +#include +#include +#include + +typedef struct csrSparseMatrix { + double* values; + int* rowaccInd; //without the first cell, always 0 + int* columnIndexes; + int size; //no. of rows + int nnz; //no. of non zero elements + +} CsrSparseMatrix; + +CsrSparseMatrix initCsrSparseMatrix(); +void allocMemoryForElements (CsrSparseMatrix *sparseMatrix, int size, int nnz); +void addElements(CsrSparseMatrix *sparseMatrix, int *fileFromMatrix, int *FileToMatrix); +void zeroOutRow(CsrSparseMatrix *sparseMatrix, int row); +void zeroOutColumn(CsrSparseMatrix *sparseMatrix, int column); +int *getRowIndexes(CsrSparseMatrix sparseMatrix, int row, int *rowSize); +void transposeSparseMatrix(CsrSparseMatrix *sparseMatrix); +void csrSparseMatrixVectorMultiplication(CsrSparseMatrix sparseMatrix, double *vector, + double **product, int vectorSize); +void destroyCsrSparseMatrix(CsrSparseMatrix *sparseMatrix); +void printCsrSparseMatrix(CsrSparseMatrix sparseMatrix); + +#endif // CSR_SPARSE_MATRIX_H \ No newline at end of file diff --git a/serial_csr/serial_gs_pagerank_functions.c b/serial_csr/serial_gs_pagerank_functions.c new file mode 100644 index 0000000..cac91b2 --- /dev/null +++ b/serial_csr/serial_gs_pagerank_functions.c @@ -0,0 +1,506 @@ +/* ===== INCLUDES ===== */ + +#include "serial_gs_pagerank_functions.h" + +/* ===== CONSTANTS ===== */ + +const char *ARGUMENT_CONVERGENCE_TOLERANCE = "-c"; +const char *ARGUMENT_MAX_ITERATIONS = "-m"; +const char *ARGUMENT_DAMPING_FACTOR = "-a"; +const char *ARGUMENT_VERBAL_OUTPUT = "-v"; +const char *ARGUMENT_OUTPUT_HISTORY = "-h"; +const char *ARGUMENT_OUTPUT_FILENAME = "-o"; + +const int NUMERICAL_BASE = 10; +char *DEFAULT_OUTPUT_FILENAME = "pagerank_output"; +const int FILE_READ_BUFFER_SIZE = 4096; + +const int CONVERGENCE_CHECK_ITERATION_PERIOD = 3; +const int SPARSITY_INCREASE_ITERATION_PERIOD = 9; + +/* ===== FUNCTIONS ===== */ + +int pagerank(CsrSparseMatrix *transitionMatrix, double **pagerankVector, + bool *convergenceStatus, Parameters parameters) { + // Variables declaration + int iterations = 0, numberOfPages = parameters.numberOfPages; + double delta, *pagerankDifference, *previousPagerankVector, + *convergedPagerankVector, *linksFromConvergedPagesPagerankVector; + LilSparseMatrix linksFromConvergedPages = createLilSparseMatrix(); + bool *convergenceMatrix; + + // Space allocation + { + size_t sizeofDouble = sizeof(double); + // pagerankDifference used to calculate delta + pagerankDifference = (double *) malloc(numberOfPages * sizeofDouble); + // previousPagerankVector holds last iteration's pagerank vector + previousPagerankVector = (double *) malloc(numberOfPages * sizeofDouble); + // convergedPagerankVector is the pagerank vector of converged pages only + convergedPagerankVector = (double *) malloc(numberOfPages * sizeofDouble); + // linksFromConvergedPagesPagerankVector holds the partial sum of the + // pagerank vector, that describes effect of the links from converged + // pages to non converged pages + linksFromConvergedPagesPagerankVector = (double *) malloc(numberOfPages * sizeofDouble); + // convergenceMatrix indicates which pages have converged + convergenceMatrix = (bool *) malloc(numberOfPages * sizeof(bool)); + *convergenceStatus = false; + + // Initialization + for (int i=0; ielements[rowIndexes[j]]; + // Checks for links from converged pages to non converged + int pageLinksTo = element->columnIndex; + if (convergenceMatrix[pageLinksTo] == false){ + // Link exists, adds element to the vector + apendElement(&linksFromConvergedPages, + element->value, i, pageLinksTo); + }*/ + int pageLinksTo = transitionMatrix->columnIndexes[rowIndexes[j]]; + if (convergenceMatrix[pageLinksTo] == false){ + // Link exists, adds element to the vector + apendElement(&linksFromConvergedPages, + transitionMatrix->values[rowIndexes[j]], i, pageLinksTo); + } + + } + + // Increases sparsity of the transition matrix by + // deleting elements that correspond to converged pages + zeroOutRow(transitionMatrix, i); + zeroOutColumn(transitionMatrix, i); + + // Builds the new linksFromConvergedPagesPagerankVector + lilSparseMatrixVectorMultiplication(linksFromConvergedPages, + *pagerankVector, &linksFromConvergedPagesPagerankVector, + numberOfPages); + } + } + free(newlyConvergedPages); + } + + ++iterations; + // Outputs information about this iteration + if (iterations%2) { + printf(ANSI_COLOR_BLUE "Iteration %d: delta = %f\n" ANSI_COLOR_RESET, iterations, delta); + } else { + printf(ANSI_COLOR_CYAN "Iteration %d: delta = %f\n" ANSI_COLOR_RESET, iterations, delta); + } + } while (!*convergenceStatus && (parameters.maxIterations == 0 || + iterations < parameters.maxIterations)); + + if (!parameters.history) { + // Outputs last pagerank vector to file + savePagerankToFile(parameters.outputFilename, false, *pagerankVector, numberOfPages); + } + + // Frees memory + free(pagerankDifference); + free(previousPagerankVector); + free(convergedPagerankVector); + free(linksFromConvergedPagesPagerankVector); + free(convergenceMatrix); + destroyLilSparseMatrix(&linksFromConvergedPages); + + return iterations; +} + +/* + * initialize allocates required memory for arrays, reads the web graph from the + * from the file and creates the initial transition probability distribution + * matrix. +*/ +void initialize(CsrSparseMatrix *transitionMatrix, + double **pagerankVector, Parameters *parameters) { + + // Reads web graph from file + if ((*parameters).verbose) { + printf(ANSI_COLOR_YELLOW "----- Reading graph from file -----\n" ANSI_COLOR_RESET); + } + generateNormalizedTransitionMatrixFromFile(transitionMatrix, parameters); + + // Outputs the algorithm parameters to the console + if ((*parameters).verbose) { + printf(ANSI_COLOR_YELLOW "\n----- Running with parameters -----\n" ANSI_COLOR_RESET\ + "Number of pages: %d", (*parameters).numberOfPages); + if (!(*parameters).maxIterations) { + printf("\nMaximum number of iterations: inf"); + } else { + printf("\nMaximum number of iterations: %d", (*parameters).maxIterations); + } + printf("\nConvergence criterion: %f" \ + "\nDamping factor: %f" \ + "\nGraph filename: %s\n", (*parameters).convergenceCriterion, + (*parameters).dampingFactor, (*parameters).graphFilename); + } + + // Allocates memory for the pagerank vector + (*pagerankVector) = (double *) malloc((*parameters).numberOfPages * sizeof(double)); + double webUniformProbability = 1. / (*parameters).numberOfPages; + for (int i=0; i<(*parameters).numberOfPages; ++i) { + (*pagerankVector)[i] = webUniformProbability; + } + + // Transposes the transition matrix (P^T). + transposeSparseMatrix(transitionMatrix); +} + +// ==================== MATH UTILS ==================== + +/* + * calculateNextPagerank calculates the product of the multiplication + * between a matrix and the a vector in a cheap way. +*/ +void calculateNextPagerank(CsrSparseMatrix *transitionMatrix, + double *previousPagerankVector, double **pagerankVector, + double *linksFromConvergedPagesPagerankVector, + double *convergedPagerankVector, int vectorSize, double dampingFactor) { + // Calculates the web uniform probability once. + double webUniformProbability = 1. / vectorSize; + + csrSparseMatrixVectorMultiplication(*transitionMatrix, previousPagerankVector, + pagerankVector, vectorSize); + + for (int i=0; i 10) { + validUsage(argumentVector[0]); + } + + (*parameters).numberOfPages = 0; + (*parameters).maxIterations = 0; + (*parameters).convergenceCriterion = 1; + (*parameters).dampingFactor = 0.85; + (*parameters).verbose = false; + (*parameters).history = false; + (*parameters).outputFilename = DEFAULT_OUTPUT_FILENAME; + + char *endPointer; + int argumentIndex = 1; + + while (argumentIndex < argumentCount) { + if (!strcmp(argumentVector[argumentIndex], ARGUMENT_CONVERGENCE_TOLERANCE)) { + argumentIndex = checkIncrement(argumentIndex, argumentCount, argumentVector[0]); + + double convergenceInput = strtod(argumentVector[argumentIndex], &endPointer); + if (convergenceInput == 0) { + printf("Invalid convergence argument\n"); + exit(EXIT_FAILURE); + } + (*parameters).convergenceCriterion = convergenceInput; + } else if (!strcmp(argumentVector[argumentIndex], ARGUMENT_MAX_ITERATIONS)) { + argumentIndex = checkIncrement(argumentIndex, argumentCount, argumentVector[0]); + + size_t iterationsInput = strtol(argumentVector[argumentIndex], &endPointer, NUMERICAL_BASE); + if (iterationsInput == 0 && endPointer) { + printf("Invalid iterations argument\n"); + exit(EXIT_FAILURE); + } + (*parameters).maxIterations = iterationsInput; + } else if (!strcmp(argumentVector[argumentIndex], ARGUMENT_DAMPING_FACTOR)) { + argumentIndex = checkIncrement(argumentIndex, argumentCount, argumentVector[0]); + + double alphaInput = strtod(argumentVector[argumentIndex], &endPointer); + if ((alphaInput == 0 || alphaInput > 1) && endPointer) { + printf("Invalid alpha argument\n"); + exit(EXIT_FAILURE); + } + (*parameters).dampingFactor = alphaInput; + } else if (!strcmp(argumentVector[argumentIndex], ARGUMENT_VERBAL_OUTPUT)) { + (*parameters).verbose = true; + } else if (!strcmp(argumentVector[argumentIndex], ARGUMENT_OUTPUT_HISTORY)) { + (*parameters).history = true; + } else if (!strcmp(argumentVector[argumentIndex], ARGUMENT_OUTPUT_FILENAME)) { + argumentIndex = checkIncrement(argumentIndex, argumentCount, argumentVector[0]); + + if (fopen(argumentVector[argumentIndex], "w") == NULL) { + printf("Invalid output filename. Reverting to default.\n"); + continue; + } + (*parameters).outputFilename = argumentVector[argumentIndex]; + } else if (argumentIndex == argumentCount - 1) { + (*parameters).graphFilename = argumentVector[argumentIndex]; + } else { + validUsage(argumentVector[0]); + exit(EXIT_FAILURE); + } + ++argumentIndex; + } +} + +/* + * readGraphFromFile loads the file supplied in the command line arguments to an + * array (directedWebGraph) that represents the graph. +*/ +void generateNormalizedTransitionMatrixFromFile(CsrSparseMatrix *transitionMatrix, + Parameters *parameters){ + FILE *graphFile; + + // Opens the file for reading + graphFile = fopen((*parameters).graphFilename, "r+"); + if (!graphFile) { + printf("Error opening file \n"); + exit(EXIT_FAILURE); + } + + char buffer[FILE_READ_BUFFER_SIZE]; + char *readResult; + // Skips the first two lines + readResult = fgets(buffer, FILE_READ_BUFFER_SIZE, graphFile); + readResult = fgets(buffer, FILE_READ_BUFFER_SIZE, graphFile); + if (readResult == NULL) { + printf("Error while reading from the file. Does the file have the correct format?\n"); + exit(EXIT_FAILURE); + } + + // Third line contains the numbers of nodes and edges + int numberOfNodes = 0, numberOfEdges = 0; + + readResult = fgets(buffer, FILE_READ_BUFFER_SIZE, graphFile); + if (readResult == NULL) { + printf("Error while reading from the file. Does the file have the correct format?\n"); + exit(EXIT_FAILURE); + } + + // Parses the number of nodes and number of edges + { + // Splits string to whitespace + char *token = strtok(buffer, " "); + bool nextIsNodes = false, nextIsEdges = false; + + while (token != NULL) { + if (strcmp(token, "Nodes:") == 0) { + nextIsNodes = true; + } else if (nextIsNodes) { + numberOfNodes = atoi(token); + nextIsNodes = false; + } else if (strcmp(token, "Edges:") == 0) { + nextIsEdges = true; + } else if (nextIsEdges) { + numberOfEdges = atoi(token); + break; + } + + // Gets next string token + token = strtok (NULL, " ,.-"); + } + } + + if ((*parameters).verbose) { + printf("File claims number of pages is: %d\nThe number of edges is: %d\n", + numberOfNodes, numberOfEdges); + } + + // Skips the fourth line + readResult = fgets(buffer, 512, graphFile); + if (readResult == NULL) { + printf("Error while reading from the file. Does the file have the correct format?\n"); + exit(EXIT_FAILURE); + } + + int tenPercentIncrements = (int) numberOfEdges/10; + int maxPageIndex = 0; + int* fileToMatrix = malloc(numberOfEdges*sizeof(int)); + int* fileFromMatrix = malloc(numberOfEdges*sizeof(int)); + for (int i=0; i maxPageIndex) { + maxPageIndex = fileFrom; + } + if (fileTo > maxPageIndex) { + maxPageIndex = fileTo; + } + //addElement(transitionMatrix, 1, fileFrom, fileTo); + + } + printf("\n"); + if ((*parameters).verbose) { + printf("Max page index found is: %d\n", maxPageIndex); + } + (*parameters).numberOfPages = maxPageIndex + 1; + + + allocMemoryForElements(transitionMatrix, (*parameters).numberOfPages, numberOfEdges); + addElements(transitionMatrix, fileFromMatrix, fileToMatrix); + // Calculates the outdegree of each page and assigns the uniform probability + // of transition to the elements of the corresponding row + + int pageOutdegree = 1; + + + for(int i=0; isize; ++i){ + if(i==0){ + pageOutdegree+=transitionMatrix->rowaccInd[i]; + } + else{ + pageOutdegree+=transitionMatrix->rowaccInd[i]-transitionMatrix->rowaccInd[i-1]; + } + + + double pageUniformProbability = 1. / pageOutdegree; + int k = transitionMatrix->rowaccInd[i-1]+1; + for (int j = k; jvalues[j] = pageUniformProbability; + } + pageOutdegree = 1; + + } + + fclose(graphFile); +} + +/* + * validUsage outputs a message to the console that informs the user of the + * correct (valid) way to use the program. +*/ +void validUsage(char *programName) { + printf("%s [-c convergence_criterion] [-m max_iterations] [-a alpha] [-v] [-h] [-o output_filename] " \ + "\n-c convergence_criterion" \ + "\n\tthe convergence tolerance criterion" \ + "\n-m max_iterations" \ + "\n\tmaximum number of iterations to perform" \ + "\n-a alpha" \ + "\n\tthe damping factor" \ + "\n-v enable verbal output" \ + "\n-h enable history output to file" \ + "\n-o output_filename" \ + "\n\tfilename and path for the output" \ + "\n", programName); + exit(EXIT_FAILURE); +} + +/* + * checkIncrement is a helper function for parseArguments function. +*/ +int checkIncrement(int previousIndex, int maxIndex, char *programName) { + if (previousIndex == maxIndex) { + validUsage(programName); + exit(EXIT_FAILURE); + } + return ++previousIndex; +} + +void savePagerankToFile(char *filename, bool append, double *pagerankVector, int vectorSize) { + FILE *outputFile; + + if (append) { + outputFile = fopen(filename, "a"); + } else { + outputFile = fopen(filename, "w"); + } + + if (outputFile == NULL) { + printf("Error while opening the output file.\n"); + return; + } + + for (int i=0; i