Browse Source

Replace last matrices with SparseMatrix, Critical fix for file reading, Remove incompatible datasets

master
Apostolos Fanakis 6 years ago
parent
commit
bb50262865
No known key found for this signature in database GPG Key ID: 56CE2DEDE9F1FB78
  1. 10
      datasets/University of Toronto/README.md
  2. 7399
      datasets/University of Toronto/_architecture/graph/adj_list
  3. 5354
      datasets/University of Toronto/_blues/graph/adj_list
  4. 11659
      datasets/University of Toronto/_search_engines/graph/adj_list
  5. 8011
      datasets/University of Toronto/_weather/graph/adj_list
  6. 32
      serial/serial_gs_pagerank_functions.c

10
datasets/University of Toronto/README.md

@ -1,10 +0,0 @@
The datasets on this folder where downloaded from the website of the computer science course at University of Toronto, found [here](http://www.cs.toronto.edu/~tsap/experiments/download/download.html).
More details about the datasets can be found in the table bellow.
| Dataset directory | Description | URL link |
| ----------- | ----------- | ----------- |
| \_architecture | "architecture" | [link](http://www.cs.toronto.edu/~tsap/experiments/download/_architecture.tar.Z) |
| \_blues | "blues" | [link](http://www.cs.toronto.edu/~tsap/experiments/download/_blues.tar.Z) |
| \_search_engines | "search_engines" | [link](http://www.cs.toronto.edu/~tsap/experiments/download/_search_engines.tar.Z) |
| \_weather | "weather" | [link](http://www.cs.toronto.edu/~tsap/experiments/download/_weather.tar.Z) |

7399
datasets/University of Toronto/_architecture/graph/adj_list

File diff suppressed because one or more lines are too long

5354
datasets/University of Toronto/_blues/graph/adj_list

File diff suppressed because it is too large

11659
datasets/University of Toronto/_search_engines/graph/adj_list

File diff suppressed because one or more lines are too long

8011
datasets/University of Toronto/_weather/graph/adj_list

File diff suppressed because it is too large

32
serial/serial_gs_pagerank_functions.c

@ -20,8 +20,8 @@ int pagerank(SparseMatrix *transitionMatrix, double **pagerankVector,
*vectorDifference = (double *) malloc(parameters.numberOfPages * sizeof(double)), *vectorDifference = (double *) malloc(parameters.numberOfPages * sizeof(double)),
*previousPagerankVector = (double *) malloc(parameters.numberOfPages * sizeof(double)), *previousPagerankVector = (double *) malloc(parameters.numberOfPages * sizeof(double)),
*convergedPagerankVector = (double *) malloc(parameters.numberOfPages * sizeof(double)), *convergedPagerankVector = (double *) malloc(parameters.numberOfPages * sizeof(double)),
**linksFromConvergedPages = (double **) malloc(parameters.numberOfPages * sizeof(double *)),
*linksFromConvergedPagesPagerankVector = (double *) malloc(parameters.numberOfPages * sizeof(double)); *linksFromConvergedPagesPagerankVector = (double *) malloc(parameters.numberOfPages * sizeof(double));
SparseMatrix linksFromConvergedPages = createSparseMatrix();
bool *converganceMatrix = (bool *) malloc(parameters.numberOfPages * sizeof(bool)); bool *converganceMatrix = (bool *) malloc(parameters.numberOfPages * sizeof(bool));
*convergenceStatus = false; *convergenceStatus = false;
@ -29,11 +29,6 @@ int pagerank(SparseMatrix *transitionMatrix, double **pagerankVector,
convergedPagerankVector[i] = 0; convergedPagerankVector[i] = 0;
converganceMatrix[i] = false; converganceMatrix[i] = false;
linksFromConvergedPagesPagerankVector[i] = 0; linksFromConvergedPagesPagerankVector[i] = 0;
linksFromConvergedPages[i] = (double *) malloc(parameters.numberOfPages * sizeof(double));
for (int j=0; j<parameters.numberOfPages; ++j) {
linksFromConvergedPages[i][j] = 0;
}
} }
if (parameters.verbose) { if (parameters.verbose) {
@ -63,7 +58,7 @@ int pagerank(SparseMatrix *transitionMatrix, double **pagerankVector,
*convergenceStatus = true; *convergenceStatus = true;
} }
if (iterations && !iterations % 10) { if (iterations && (!iterations % 10)) {
for (int i=0; i<parameters.numberOfPages; ++i) { for (int i=0; i<parameters.numberOfPages; ++i) {
double temp = fabs((*pagerankVector)[i] - previousPagerankVector[i]) / fabs(previousPagerankVector[i]); double temp = fabs((*pagerankVector)[i] - previousPagerankVector[i]) / fabs(previousPagerankVector[i]);
if (temp < parameters.convergenceCriterion){ if (temp < parameters.convergenceCriterion){
@ -77,17 +72,16 @@ int pagerank(SparseMatrix *transitionMatrix, double **pagerankVector,
for (int j=0; j<parameters.numberOfPages; ++j){ for (int j=0; j<parameters.numberOfPages; ++j){
if (converganceMatrix[j] == false){ if (converganceMatrix[j] == false){
SparseMatrixElement *element = getElement(*transitionMatrix, i, j); SparseMatrixElement *element = getElement(*transitionMatrix, i, j);
linksFromConvergedPages[i][j] = element != NULL ? element->value : 0; if (element != NULL) {
apendElement(&linksFromConvergedPages, element->value, i, j);
}
} }
deleteElement(transitionMatrix, i, j); deleteElement(transitionMatrix, i, j);
deleteElement(transitionMatrix, j, i); deleteElement(transitionMatrix, j, i);
} }
double sum = 0; sparseMatrixVectorMultiplication(linksFromConvergedPages, *pagerankVector,
for (int j=0; j<parameters.numberOfPages; ++j) { &linksFromConvergedPagesPagerankVector, parameters.numberOfPages);
sum += linksFromConvergedPages[i][j] * (*pagerankVector)[j];
}
linksFromConvergedPagesPagerankVector[i] = sum;
} }
} }
} }
@ -319,10 +313,9 @@ void generateNormalizedTransitionMatrixFromFile(SparseMatrix *transitionMatrix,
} }
if ((*parameters).verbose) { if ((*parameters).verbose) {
printf("The number of pages is: %d\nThe number of edges is: %d\n", printf("File claims number of pages is: %d\nThe number of edges is: %d\n",
numberOfNodes, numberOfEdges); numberOfNodes, numberOfEdges);
} }
(*parameters).numberOfPages = numberOfNodes;
// Skips the fourth line // Skips the fourth line
readResult = fgets(buffer, 512, graphFile); readResult = fgets(buffer, 512, graphFile);
@ -334,6 +327,7 @@ void generateNormalizedTransitionMatrixFromFile(SparseMatrix *transitionMatrix,
printf("SIZE OF STRUCT = %lu Bytes\n", sizeof(SparseMatrixElement)); printf("SIZE OF STRUCT = %lu Bytes\n", sizeof(SparseMatrixElement));
int fivePercentIncrements = (int) numberOfEdges/20; int fivePercentIncrements = (int) numberOfEdges/20;
int maxPageIndex = 0;
fivePercentIncrements = fivePercentIncrements != 0 ? fivePercentIncrements : 1; fivePercentIncrements = fivePercentIncrements != 0 ? fivePercentIncrements : 1;
for (int i=0; i<numberOfEdges; i++) { for (int i=0; i<numberOfEdges; i++) {
@ -352,9 +346,17 @@ void generateNormalizedTransitionMatrixFromFile(SparseMatrix *transitionMatrix,
break; break;
} }
if (fileFrom > maxPageIndex) {
maxPageIndex = fileFrom;
}
apendElement(transitionMatrix, 1, fileFrom, fileTo); apendElement(transitionMatrix, 1, fileFrom, fileTo);
} }
if ((*parameters).verbose) {
printf("Max page index found is: %d\n", maxPageIndex);
}
(*parameters).numberOfPages = maxPageIndex + 1;
// Calculates the outdegree of each page and assigns the uniform probability // Calculates the outdegree of each page and assigns the uniform probability
// of transition to the elements of the corresponding row // of transition to the elements of the corresponding row
int currentRow = transitionMatrix->firstElement->rowIndex; int currentRow = transitionMatrix->firstElement->rowIndex;

Loading…
Cancel
Save