@ -9,11 +9,12 @@ const char *ARGUMENT_OUTPUT_FILENAME = "-o";
const int NUMERICAL_BASE = 10 ;
const int NUMERICAL_BASE = 10 ;
char * DEFAULT_OUTPUT_FILENAME = " pagerank_output " ;
char * DEFAULT_OUTPUT_FILENAME = " pagerank_output " ;
const int MAX_PAGE_LINKS_TEXT _SIZE = 4096 ;
const int FILE_READ_BUFFER _SIZE = 4096 ;
// ==================== PAGERANK ====================
// ==================== PAGERANK ====================
int pagerank ( SparseMatrix * transitionMatrix , double * * pagerankVector , Parameters parameters ) {
int pagerank ( SparseMatrix * transitionMatrix , double * * pagerankVector ,
bool * convergenceStatus , Parameters parameters ) {
int iterations = 0 ;
int iterations = 0 ;
double delta ,
double delta ,
* vectorDifference = ( double * ) malloc ( parameters . numberOfPages * sizeof ( double ) ) ,
* vectorDifference = ( double * ) malloc ( parameters . numberOfPages * sizeof ( double ) ) ,
@ -22,6 +23,7 @@ int pagerank(SparseMatrix *transitionMatrix, double **pagerankVector, Parameters
* * linksFromConvergedPages = ( double * * ) malloc ( parameters . numberOfPages * sizeof ( double * ) ) ,
* * linksFromConvergedPages = ( double * * ) malloc ( parameters . numberOfPages * sizeof ( double * ) ) ,
* linksFromConvergedPagesPagerankVector = ( double * ) malloc ( parameters . numberOfPages * sizeof ( double ) ) ;
* linksFromConvergedPagesPagerankVector = ( double * ) malloc ( parameters . numberOfPages * sizeof ( double ) ) ;
bool * converganceMatrix = ( bool * ) malloc ( parameters . numberOfPages * sizeof ( bool ) ) ;
bool * converganceMatrix = ( bool * ) malloc ( parameters . numberOfPages * sizeof ( bool ) ) ;
* convergenceStatus = false ;
for ( int i = 0 ; i < parameters . numberOfPages ; + + i ) {
for ( int i = 0 ; i < parameters . numberOfPages ; + + i ) {
convergedPagerankVector [ i ] = 0 ;
convergedPagerankVector [ i ] = 0 ;
@ -35,7 +37,7 @@ int pagerank(SparseMatrix *transitionMatrix, double **pagerankVector, Parameters
}
}
if ( parameters . verbose ) {
if ( parameters . verbose ) {
printf ( " \n ----- Starting iterations ----- \n " ) ;
printf ( ANSI_COLOR_YELLOW " \n ----- Starting iterations ----- \n " ANSI_COLOR_RESET ) ;
}
}
do {
do {
@ -57,6 +59,9 @@ int pagerank(SparseMatrix *transitionMatrix, double **pagerankVector, Parameters
vectorDifference [ i ] = ( * pagerankVector ) [ i ] - previousPagerankVector [ i ] ;
vectorDifference [ i ] = ( * pagerankVector ) [ i ] - previousPagerankVector [ i ] ;
}
}
delta = vectorNorm ( vectorDifference , parameters . numberOfPages ) ;
delta = vectorNorm ( vectorDifference , parameters . numberOfPages ) ;
if ( delta < parameters . convergenceCriterion ) {
* convergenceStatus = true ;
}
if ( iterations & & ! iterations % 10 ) {
if ( iterations & & ! iterations % 10 ) {
for ( int i = 0 ; i < parameters . numberOfPages ; + + i ) {
for ( int i = 0 ; i < parameters . numberOfPages ; + + i ) {
@ -88,8 +93,12 @@ int pagerank(SparseMatrix *transitionMatrix, double **pagerankVector, Parameters
}
}
+ + iterations ;
+ + iterations ;
printf ( " Iteration %d: delta = %f \n " , iterations , delta ) ;
if ( iterations % 2 ) {
} while ( delta > parameters . convergenceCriterion & &
printf ( ANSI_COLOR_BLUE " Iteration %d: delta = %f \n " ANSI_COLOR_RESET , iterations , delta ) ;
} else {
printf ( ANSI_COLOR_CYAN " Iteration %d: delta = %f \n " ANSI_COLOR_RESET , iterations , delta ) ;
}
} while ( ! * convergenceStatus & &
( parameters . maxIterations = = 0 | | iterations < parameters . maxIterations ) ) ;
( parameters . maxIterations = = 0 | | iterations < parameters . maxIterations ) ) ;
if ( ! parameters . history ) {
if ( ! parameters . history ) {
@ -112,22 +121,22 @@ void initialize(SparseMatrix *transitionMatrix,
// Reads web graph from file
// Reads web graph from file
if ( ( * parameters ) . verbose ) {
if ( ( * parameters ) . verbose ) {
printf ( " ----- Reading graph from file ----- \n " ) ;
printf ( ANSI_COLOR_YELLOW " ----- Reading graph from file ----- \n " ANSI_COLOR_RESET ) ;
}
}
generateNormalizedTransitionMatrixFromFile ( transitionMatrix , parameters ) ;
generateNormalizedTransitionMatrixFromFile ( transitionMatrix , parameters ) ;
// Outputs the algorithm parameters to the console
// Outputs the algorithm parameters to the console
if ( ( * parameters ) . verbose ) {
if ( ( * parameters ) . verbose ) {
printf ( " \n ----- Running with parameters ----- \
printf ( ANSI_COLOR_YELLOW " \n ----- Running with parameters ----- \n " ANSI_COLOR_RESET \
\ nNumber of pages : % d " , (*parameters).numberOfPages) ;
" Number of pages: %d " , ( * parameters ) . numberOfPages ) ;
if ( ! ( * parameters ) . maxIterations ) {
if ( ! ( * parameters ) . maxIterations ) {
printf ( " \n Maximum number of iterations: inf " ) ;
printf ( " \n Maximum number of iterations: inf " ) ;
} else {
} else {
printf ( " \n Maximum number of iterations: %d " , ( * parameters ) . maxIterations ) ;
printf ( " \n Maximum number of iterations: %d " , ( * parameters ) . maxIterations ) ;
}
}
printf ( " \n Convergence criterion: %f \
printf ( " \n Convergence criterion: %f " \
\ nDamping factor : % f \
" \n Damping factor: %f " \
\ nGraph filename : % s \ n " , (*parameters).convergenceCriterion ,
" \n Graph filename: %s \n " , ( * parameters ) . convergenceCriterion ,
( * parameters ) . dampingFactor , ( * parameters ) . graphFilename ) ;
( * parameters ) . dampingFactor , ( * parameters ) . graphFilename ) ;
}
}
@ -266,37 +275,88 @@ void generateNormalizedTransitionMatrixFromFile(SparseMatrix *transitionMatrix,
exit ( EXIT_FAILURE ) ;
exit ( EXIT_FAILURE ) ;
}
}
int pageIndex , count = 0 ;
char buffer [ FILE_READ_BUFFER_SIZE ] ;
while ( fscanf ( graphFile , " %d: " , & pageIndex ) ! = EOF ) {
char * readResult ;
if ( ! ( pageIndex % 51050 ) ) {
// Skips the first two lines
printf ( " \t %d \t %d%% \n " , pageIndex , + + count ) ;
readResult = fgets ( buffer , FILE_READ_BUFFER_SIZE , graphFile ) ;
readResult = fgets ( buffer , FILE_READ_BUFFER_SIZE , graphFile ) ;
if ( readResult = = NULL ) {
printf ( " Error while reading from the file. Does the file have the correct format? \n " ) ;
exit ( EXIT_FAILURE ) ;
}
}
char * restOfLine = malloc ( MAX_PAGE_LINKS_TEXT_SIZE ) ;
// Third line contains the numbers of nodes and edges
if ( ! fgets ( restOfLine , MAX_PAGE_LINKS_TEXT_SIZE , graphFile ) ) {
int numberOfNodes = 0 , numberOfEdges ;
readResult = fgets ( buffer , FILE_READ_BUFFER_SIZE , graphFile ) ;
if ( readResult = = NULL ) {
printf ( " Error while reading from the file. Does the file have the correct format? \n " ) ;
exit ( EXIT_FAILURE ) ;
exit ( EXIT_FAILURE ) ;
}
}
char * token = strtok ( restOfLine , " " ) ;
// Parses the number of nodes and number of edges
{
// Splits string to whitespace
char * token = strtok ( buffer , " " ) ;
bool nextIsNodes = false , nextIsEdges = false ;
while ( token ! = NULL ) {
while ( token ! = NULL ) {
if ( strcmp ( token , " \n " ) = = 0 ) {
if ( strcmp ( token , " Nodes: " ) = = 0 ) {
//token = strtok (NULL, " ");
nextIsNodes = true ;
} else if ( nextIsNodes ) {
numberOfNodes = atoi ( token ) ;
nextIsNodes = false ;
} else if ( strcmp ( token , " Edges: " ) = = 0 ) {
nextIsEdges = true ;
} else if ( nextIsEdges ) {
numberOfEdges = atoi ( token ) ;
break ;
break ;
}
}
int outLink = atoi ( token ) ;
// Gets next string token
if ( outLink ! = - 1 ) {
token = strtok ( NULL , " ,.- " ) ;
apendElement ( transitionMatrix , 1 , pageIndex , outLink ) ;
}
}
if ( ( * parameters ) . verbose ) {
printf ( " The number of pages is: %d \n The number of edges is: %d \n " ,
numberOfNodes , numberOfEdges ) ;
}
( * parameters ) . numberOfPages = numberOfNodes ;
// Skips the fourth line
readResult = fgets ( buffer , 512 , graphFile ) ;
if ( readResult = = NULL ) {
printf ( " Error while reading from the file. Does the file have the correct format? \n " ) ;
exit ( EXIT_FAILURE ) ;
}
printf ( " SIZE OF STRUCT = %lu Bytes \n " , sizeof ( SparseMatrixElement ) ) ;
int fivePercentIncrements = ( int ) numberOfEdges / 20 ;
fivePercentIncrements = fivePercentIncrements ! = 0 ? fivePercentIncrements : 1 ;
for ( int i = 0 ; i < numberOfEdges ; i + + ) {
if ( ( ( * parameters ) . verbose ) & & ( ( i % fivePercentIncrements ) = = 0 ) ) {
int percentage = ( i / fivePercentIncrements ) * 5 ;
printf ( " %d%% done " , percentage ) ;
if ( percentage % 20 = = 0 ) {
printf ( " \n " ) ;
} else {
printf ( " •••• " ) ;
}
}
token = strtok ( NULL , " " ) ;
}
}
int fileFrom = 0 , fileTo = 0 ;
if ( ! fscanf ( graphFile , " %d %d " , & fileFrom , & fileTo ) ) {
break ;
}
}
printf ( " \t 100%% \n " ) ;
printf ( " number of edges = %d \n " , transitionMatrix - > elements ) ;
( * parameters ) . numberOfPages = pageIndex + 1 ;
apendElement ( transitionMatrix , 1 , fileFrom , fileTo ) ;
}
// Calculates the outdegree of each page and assigns the uniform probability
// of transition to the elements of the corresponding row
int currentRow = transitionMatrix - > firstElement - > rowIndex ;
int currentRow = transitionMatrix - > firstElement - > rowIndex ;
SparseMatrixElement * startElement = transitionMatrix - > firstElement ;
SparseMatrixElement * startElement = transitionMatrix - > firstElement ;
while ( true ) {
while ( true ) {
@ -315,9 +375,10 @@ void generateNormalizedTransitionMatrixFromFile(SparseMatrix *transitionMatrix,
// Assigns the value 1/outdegree to current page's columns
// Assigns the value 1/outdegree to current page's columns
currentElement = startElement ;
currentElement = startElement ;
double pageUniformProbability = 1. / pageOutdegree ;
for ( int i = 0 ; i < pageOutdegree ; + + i ) {
for ( int i = 0 ; i < pageOutdegree ; + + i ) {
if ( currentElement - > rowIndex = = currentRow ) {
if ( currentElement - > rowIndex = = currentRow ) {
currentElement - > value = 1. / pageOutdegree ;
currentElement - > value = pageUniformProbability ;
currentElement = currentElement - > nextElement ;
currentElement = currentElement - > nextElement ;
} else {
} else {
break ;
break ;
@ -341,18 +402,18 @@ void generateNormalizedTransitionMatrixFromFile(SparseMatrix *transitionMatrix,
* correct ( valid ) way to use the program .
* correct ( valid ) way to use the program .
*/
*/
void validUsage ( char * programName ) {
void validUsage ( char * programName ) {
printf ( " %s [-c convergence_criterion] [-m max_iterations] [-a alpha] [-v] [-h] [-o output_filename] <graph_file> \
printf ( " %s [-c convergence_criterion] [-m max_iterations] [-a alpha] [-v] [-h] [-o output_filename] <graph_file> " \
\ n - c convergence_criterion \
" \n -c convergence_criterion " \
\ n \ tthe convergence tolerance criterion \
" \n \t the convergence tolerance criterion " \
\ n - m max_iterations \
" \n -m max_iterations " \
\ n \ tmaximum number of iterations to perform \
" \n \t maximum number of iterations to perform " \
\ n - a alpha \
" \n -a alpha " \
\ n \ tthe damping factor \
" \n \t the damping factor " \
\ n - v enable verbal output \
" \n -v enable verbal output " \
\ n - h enable history output to file \
" \n -h enable history output to file " \
\ n - o output_filename \
" \n -o output_filename " \
\ n \ tfilename and path for the output \
" \n \t filename and path for the output " \
\ n " , programName) ;
" \n " , programName ) ;
exit ( EXIT_FAILURE ) ;
exit ( EXIT_FAILURE ) ;
}
}