Browse Source

Implement filter-based adaptive algorithm, Add bigger datasets

master
Apostolos Fanakis 6 years ago
parent
commit
d7dc57ea58
No known key found for this signature in database GPG Key ID: 56CE2DEDE9F1FB78
  1. 2
      .gitignore
  2. 7399
      datasets/_architecture/graph/adj_list
  3. 36996
      datasets/_architecture/graph/nodes
  4. 5354
      datasets/_blues/graph/adj_list
  5. 26771
      datasets/_blues/graph/nodes
  6. 11659
      datasets/_search_engines/graph/adj_list
  7. 58296
      datasets/_search_engines/graph/nodes
  8. 8011
      datasets/_weather/graph/adj_list
  9. 40056
      datasets/_weather/graph/nodes
  10. 80
      datasets/list2matrix.c
  11. 26
      serial/serial_gs_pagerank_functions.c
  12. 3
      serial/serial_gs_pagerank_functions.h

2
.gitignore

@ -53,3 +53,5 @@ dkms.conf
#Output files #Output files
*_output *_output
adj_matrix

7399
datasets/_architecture/graph/adj_list

File diff suppressed because one or more lines are too long

36996
datasets/_architecture/graph/nodes

File diff suppressed because it is too large

5354
datasets/_blues/graph/adj_list

File diff suppressed because it is too large

26771
datasets/_blues/graph/nodes

File diff suppressed because it is too large

11659
datasets/_search_engines/graph/adj_list

File diff suppressed because one or more lines are too long

58296
datasets/_search_engines/graph/nodes

File diff suppressed because it is too large

8011
datasets/_weather/graph/adj_list

File diff suppressed because it is too large

40056
datasets/_weather/graph/nodes

File diff suppressed because it is too large

80
datasets/list2matrix.c

@ -0,0 +1,80 @@
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
int main(int argc, char **argv){
FILE *fnodes;
char nodes_file[1000];
FILE *flist;
char list_file[1000];
FILE *fmatrix;
char matrix_file[1000];
int i,j;
int **E;
char *path;
int N; // number of nodes
/***
The argument for the program is the directory name
of the query for which we want to create the adjacency matrix
***/
if (argc != 2){
printf("list2matrix query_directory\n");
exit(1);
}
path = strdup(argv[1]);
/*** open the nodes file to obtain the number of nodes ***/
sprintf(nodes_file,"%s/graph/nodes",path);
fnodes = fopen(nodes_file,"r");
if (fnodes == NULL){
printf("ERROR: Cant open file %s\n",nodes_file);
exit(1);
}
fscanf(fnodes,"%d",&N);
fclose(fnodes);
/**** Read List and Construct the adjacency matrix E ****/
E = (int **)malloc(N*sizeof(int *));
for (i = 0; i < N; i ++){
E[i] = (int *)malloc(N*sizeof(int));
for (j = 0; j < N; j ++){
E[i][j] = 0;
}
}
sprintf(list_file,"%s/graph/adj_list",path);
flist = fopen(list_file,"r");
for (i = 0; i < N; i ++){
fscanf(flist,"%*d: %d",&j);
while (j != -1){
E[i][j] = 1;
fscanf(flist,"%d",&j);
}
}
fclose(flist);
/*** print the adjacency matrix ***/
sprintf(matrix_file,"%s/graph/adj_matrix",path);
fmatrix = fopen(matrix_file,"w");
for (i = 0; i < N; i ++){
for (j = 0; j < N; j ++){
fprintf(fmatrix,"%d ", E[i][j]);
}
fprintf(fmatrix,"\n");
}
fclose(fmatrix);
}

26
serial/serial_gs_pagerank_functions.c

@ -16,7 +16,12 @@ int pagerank(double ***transitionMatrix, double **pagerankVector, Parameters par
int iterations = 0; int iterations = 0;
double delta, double delta,
*vectorDifference = (double *) malloc(parameters.numberOfPages * sizeof(double)), *vectorDifference = (double *) malloc(parameters.numberOfPages * sizeof(double)),
*previousPagerankVector = (double *) malloc(parameters.numberOfPages * sizeof(double)); *previousPagerankVector = (double *) malloc(parameters.numberOfPages * sizeof(double)),
*convergedPagerankVector = (double *) malloc(parameters.numberOfPages * sizeof(double));
for (int i=0; i<parameters.numberOfPages; ++i) {
convergedPagerankVector[i] = 0;
}
if (parameters.verbose) { if (parameters.verbose) {
printf("\n----- Starting iterations -----\n"); printf("\n----- Starting iterations -----\n");
@ -25,7 +30,7 @@ int pagerank(double ***transitionMatrix, double **pagerankVector, Parameters par
do { do {
memcpy(previousPagerankVector, *pagerankVector, parameters.numberOfPages * sizeof(double)); memcpy(previousPagerankVector, *pagerankVector, parameters.numberOfPages * sizeof(double));
matrixVectorMultiplication(transitionMatrix, previousPagerankVector, matrixVectorMultiplication(transitionMatrix, previousPagerankVector, convergedPagerankVector,
pagerankVector, parameters.numberOfPages, parameters.dampingFactor); pagerankVector, parameters.numberOfPages, parameters.dampingFactor);
if (parameters.history) { if (parameters.history) {
@ -38,6 +43,18 @@ int pagerank(double ***transitionMatrix, double **pagerankVector, Parameters par
} }
delta = vectorNorm(vectorDifference, parameters.numberOfPages); delta = vectorNorm(vectorDifference, parameters.numberOfPages);
if (!iterations % 5) {
for (int i=0; i<parameters.numberOfPages; ++i) {
double temp = fabs((*pagerankVector)[i] - previousPagerankVector[i]) / fabs(previousPagerankVector[i]);
if (temp < parameters.convergenceCriterion){
convergedPagerankVector[i] = (*pagerankVector)[i];
for (int j=0; j<parameters.numberOfPages; ++j){
(*transitionMatrix)[i][j] = 0;
}
}
}
}
++iterations; ++iterations;
printf("Iteration %d: delta = %f\n", iterations, delta); printf("Iteration %d: delta = %f\n", iterations, delta);
} while (delta > parameters.convergenceCriterion && } while (delta > parameters.convergenceCriterion &&
@ -141,7 +158,8 @@ void generateNormalizedTransitionMatrix(double ***transitionMatrix,
* between a matrix and the a vector in a cheap way. * between a matrix and the a vector in a cheap way.
*/ */
void matrixVectorMultiplication(double ***matrix, double *vector, void matrixVectorMultiplication(double ***matrix, double *vector,
double **product, int vectorSize, double dampingFactor) { double *convergedPagerankVector, double **product, int vectorSize,
double dampingFactor) {
double webUniformProbability = 1. / vectorSize; double webUniformProbability = 1. / vectorSize;
for (int i=0; i<vectorSize; ++i) { for (int i=0; i<vectorSize; ++i) {
@ -150,7 +168,7 @@ void matrixVectorMultiplication(double ***matrix, double *vector,
for (int j=0; j<vectorSize; ++j) { for (int j=0; j<vectorSize; ++j) {
sum += (*matrix)[i][j] * vector[j]; sum += (*matrix)[i][j] * vector[j];
} }
(*product)[i] = dampingFactor * sum; (*product)[i] = dampingFactor * sum + convergedPagerankVector[i];
} }
double normDifference = vectorNorm(vector, vectorSize) - double normDifference = vectorNorm(vector, vectorSize) -

3
serial/serial_gs_pagerank_functions.h

@ -75,7 +75,8 @@ double vectorNorm(double *vector, int vectorSize);
// Function matrixVectorMultiplication calculates the product of the // Function matrixVectorMultiplication calculates the product of the
// multiplication between a matrix and the a vector. // multiplication between a matrix and the a vector.
void matrixVectorMultiplication(double ***matrix, double *vector, void matrixVectorMultiplication(double ***matrix, double *vector,
double **product, int vectorSize, double dampingFactor); double *convergedPagerankVector, double **product, int vectorSize,
double dampingFactor);
// Function pagerank iteratively calculates the pagerank of each page until // Function pagerank iteratively calculates the pagerank of each page until
// either the convergence criterion is met or the maximum number of iterations // either the convergence criterion is met or the maximum number of iterations

Loading…
Cancel
Save