commit 7d9e806d75131c8be580d0981d700abf030d6297 Author: Apostolof Date: Thu Apr 2 21:40:50 2020 +0300 Init diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..9fba62f --- /dev/null +++ b/.gitignore @@ -0,0 +1,131 @@ +### JetBrains template +# Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm + +*.iml + +## Directory-based project format: +.idea/ +/out/ + + +### NetBeans template +nbproject/private/ +build/ +nbbuild/ +dist/ +nbdist/ +nbactions.xml +nb-configuration.xml +.nb-gradle/ + + +### Eclipse template +*.pydevproject +.metadata +.gradle +bin/ +tmp/ +*.tmp +*.bak +*.swp +*~.nib +local.properties +.settings/ +.loadpath + +# Eclipse Core +.project + +# External tool builders +.externalToolBuilders/ + +# Locally stored "Eclipse launch configurations" +*.launch + +# CDT-specific +.cproject + +# JDT-specific (Eclipse Java Development Tools) +.classpath + +# PDT-specific +.buildpath + +# sbteclipse plugin +.target + +# TeXlipse plugin +.texlipse + + +### Maven template +target/ +pom.xml.tag +pom.xml.releaseBackup +pom.xml.versionsBackup +pom.xml.next +release.properties +dependency-reduced-pom.xml +buildNumber.properties +.mvn/timing.properties + + +### Linux template +*~ + +# KDE directory preferences +.directory + +# Linux trash folder which might appear on any partition or disk +.Trash-* + + +### Windows template +# Windows image file caches +Thumbs.db +ehthumbs.db + +# Folder config file +Desktop.ini + +# Recycle Bin used on file shares +$RECYCLE.BIN/ + +# Windows Installer files +*.cab +*.msi +*.msm +*.msp + +# Windows shortcuts +*.lnk + + +### OSX template +.DS_Store +.AppleDouble +.LSOverride + +# Icon must end with two \r +Icon + +# Thumbnails +._* + +# Files that might appear in the root of a volume +.DocumentRevisions-V100 +.fseventsd +.Spotlight-V100 +.TemporaryItems +.Trashes +.VolumeIcon.icns + +# Directories potentially created on remote AFP share +.AppleDB +.AppleDesktop +Network Trash Folder +Temporary Items +.apdisk + +/secrets +/dump* diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..cd4f6d8 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,10 @@ +FROM openjdk:8-jre-alpine as build +WORKDIR /workspace/app + +WORKDIR /topic-starters-app +COPY ./api/target/topicstarters-api.jar app.jar +COPY ./run.sh run.sh + +RUN chmod +x run.sh + +ENTRYPOINT ["./run.sh"] diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..16e00b4 --- /dev/null +++ b/Makefile @@ -0,0 +1,10 @@ +build: + @docker-compose -p topicstarters build; +run: + @docker-compose -p topicstarters up -d +stop: + @docker-compose -p topicstarters down +clean-data: + @docker-compose -p topicstarters down -v +clean-images: + @docker rmi `docker images -q -f "dangling=true"` diff --git a/README.md b/README.md new file mode 100644 index 0000000..e4eef8a --- /dev/null +++ b/README.md @@ -0,0 +1,107 @@ +# Thmmy Topic Starters +> A service that parses all the topics of thmmy.gr into a database and exposes an endpoint for getting filtered pages of them + +Thmmy topic starters is an application that crawls all thmmy.gr boards every day at 2 a.m. Information parsed are then saved in a +postgres database and can be accessed by the endpoint `/api/topicstarters`. + +--- + +# API endpoint + +## View topic starters + +``` +GET /api/topicstarters +``` + +### Parameters + +| Name | Type | Description | +| ----- | ------ | ----------- | +| user | String | **Optional**. The username or ID of the user. Filters the results by user. | +| board | String | **Optional**. The title or ID of the board. Filters the results by board. | +| topic | String | **Optional**. The subject or ID of the topic. Filters the results by topic. | + +#### Example + +```shell script +curl --location \ + --request GET 'localhost:8080/api/topicstarters' \ + --form 'user=14670' \ + --form 'board=Ανακοινώσεις και Έκτακτα νέα' \ + --form 'topic=68000' +``` + +### Response + +``` +Status: 200 OK +Content-Type: application/json;charset=UTF-8 +Content-Length: 962 +Content-Encoding: gzip +``` +```json +{ + "content": [ + { + "id": "d806599f-ae77-4780-bd3d-510943588054", + "topicId": 68000, + "topicUrl": "https://www.thmmy.gr/smf/index.php?topic=68000.0", + "starterUsername": "Apostolof", + "starterUrl": "https://www.thmmy.gr/smf/index.php?action=profile;u=14670", + "starterId": 14670, + "boardTitle": "Ανακοινώσεις και Έκτακτα νέα", + "boardUrl": "https://www.thmmy.gr/smf/index.php?board=25.0", + "boardId": 25, + "topicSubject": "mTHMMY (alpha version)", + "numberOfReplies": 175, + "numberOfViews": 15729 + } + ], + "pageable": { + [...] + }, + [...] +} +``` + +\* part of the response truncated for brevity + +--- + +# Build docker image + +To build the docker image you first need to build the java application for production: +```shell script +mvn clean install package +``` + +Define a username, password and database name for the postgres database in the file `./env/topic_starters_postgres.env`. +An example of what this file might look like is given in `./env/topic_starters_postgres.example.env`. + +If you want to get all the topics accessible by a logged-in user (rather that just those publicly available to guests) you also need to create two more files containing the username and password of a user for the application to use. +* `./secrets/username`: which should contain the username +* `./secrets/password`: which should contain the password + +Then just use the Makefile to handle the build: +```shell script +make build +``` + +Run the image using: +```shell script +make run +``` + +Stop the container using: +```shell script +make stop +``` + +The Makefile also provides targets for cleaning the data and dangling images. + +--- + +## License + +[![Beerware License](https://img.shields.io/badge/license-beerware%20%F0%9F%8D%BA-blue.svg)](https://gitlab.com/Apostolof/flavours-without-borders/blob/master/LICENSE.md) \ No newline at end of file diff --git a/api/pom.xml b/api/pom.xml new file mode 100644 index 0000000..9ae9c6c --- /dev/null +++ b/api/pom.xml @@ -0,0 +1,103 @@ + + + + topicstarter + gr.thmmy.mthmmy.topicstarter + 1.0.0 + + 4.0.0 + + api + + + gr.thmmy.mthmmy.topicstarter.api.TopicStarterApplication + + + + + + gr.thmmy.mthmmy.topicstarter + entity + + + gr.thmmy.mthmmy.topicstarter + repository + + + gr.thmmy.mthmmy.topicstarter + scheduled + compile + + + gr.thmmy.mthmmy.topicstarter + service + + + + org.springframework.boot + spring-boot-configuration-processor + true + + + + + org.springframework.boot + spring-boot-starter-web + + + + + org.projectlombok + lombok + provided + + + + + org.postgresql + postgresql + runtime + + + + + + + org.springframework.boot + spring-boot-maven-plugin + + topicstarters + api + true + + + + org.apache.maven.plugins + maven-source-plugin + + + attach-sources + verify + + jar + + + + + + org.apache.maven.plugins + maven-javadoc-plugin + + + attach-javadocs + verify + + jar + + + + + + + diff --git a/api/src/main/java/gr/thmmy/mthmmy/topicstarter/api/TopicStarterApiConfig.java b/api/src/main/java/gr/thmmy/mthmmy/topicstarter/api/TopicStarterApiConfig.java new file mode 100644 index 0000000..fb7dd83 --- /dev/null +++ b/api/src/main/java/gr/thmmy/mthmmy/topicstarter/api/TopicStarterApiConfig.java @@ -0,0 +1,18 @@ +package gr.thmmy.mthmmy.topicstarter.api; + +import gr.thmmy.mthmmy.topicstarter.TopicStarterRepositoryConfig; +import gr.thmmy.mthmmy.topicstarter.entity.TopicStarterEntityConfiguration; +import gr.thmmy.mthmmy.topicstarter.scheduled.TopicStarterSchedulesConfig; +import gr.thmmy.mthmmy.topicstarter.service.TopicStarterServiceConfiguration; +import org.springframework.context.annotation.Configuration; +import org.springframework.context.annotation.Import; + +@Configuration +@Import({ + TopicStarterServiceConfiguration.class, + TopicStarterRepositoryConfig.class, + TopicStarterEntityConfiguration.class, + TopicStarterSchedulesConfig.class +}) +public class TopicStarterApiConfig { +} diff --git a/api/src/main/java/gr/thmmy/mthmmy/topicstarter/api/TopicStarterApplication.java b/api/src/main/java/gr/thmmy/mthmmy/topicstarter/api/TopicStarterApplication.java new file mode 100644 index 0000000..684f9d3 --- /dev/null +++ b/api/src/main/java/gr/thmmy/mthmmy/topicstarter/api/TopicStarterApplication.java @@ -0,0 +1,19 @@ +package gr.thmmy.mthmmy.topicstarter.api; + +import org.springframework.boot.SpringApplication; +import org.springframework.boot.autoconfigure.SpringBootApplication; +import org.springframework.boot.builder.SpringApplicationBuilder; +import org.springframework.boot.web.servlet.support.SpringBootServletInitializer; + +@SpringBootApplication +public class TopicStarterApplication extends SpringBootServletInitializer { + + public static void main(final String... args) { + SpringApplication.run(TopicStarterApplication.class, args); + } + + @Override + protected SpringApplicationBuilder configure(final SpringApplicationBuilder application) { + return application.sources(TopicStarterApplication.class); + } +} diff --git a/api/src/main/java/gr/thmmy/mthmmy/topicstarter/api/controller/topic/starter/TopicStarterController.java b/api/src/main/java/gr/thmmy/mthmmy/topicstarter/api/controller/topic/starter/TopicStarterController.java new file mode 100644 index 0000000..888090b --- /dev/null +++ b/api/src/main/java/gr/thmmy/mthmmy/topicstarter/api/controller/topic/starter/TopicStarterController.java @@ -0,0 +1,14 @@ +package gr.thmmy.mthmmy.topicstarter.api.controller.topic.starter; + +import gr.thmmy.mthmmy.topicstarter.entity.topic.TopicStarter; +import org.springframework.data.domain.Page; +import org.springframework.data.domain.Pageable; +import org.springframework.http.ResponseEntity; + +public interface TopicStarterController { + + ResponseEntity> topics(String user, + String board, + String topic, + Pageable pageable); +} diff --git a/api/src/main/java/gr/thmmy/mthmmy/topicstarter/api/controller/topic/starter/TopicStarterControllerImpl.java b/api/src/main/java/gr/thmmy/mthmmy/topicstarter/api/controller/topic/starter/TopicStarterControllerImpl.java new file mode 100644 index 0000000..84572f0 --- /dev/null +++ b/api/src/main/java/gr/thmmy/mthmmy/topicstarter/api/controller/topic/starter/TopicStarterControllerImpl.java @@ -0,0 +1,36 @@ +package gr.thmmy.mthmmy.topicstarter.api.controller.topic.starter; + +import gr.thmmy.mthmmy.topicstarter.entity.topic.TopicStarter; +import gr.thmmy.mthmmy.topicstarter.service.topic.starter.TopicStarterService; +import lombok.RequiredArgsConstructor; +import lombok.extern.slf4j.Slf4j; +import org.springframework.data.domain.Page; +import org.springframework.data.domain.Pageable; +import org.springframework.http.ResponseEntity; +import org.springframework.web.bind.annotation.GetMapping; +import org.springframework.web.bind.annotation.RequestMapping; +import org.springframework.web.bind.annotation.RequestParam; +import org.springframework.web.bind.annotation.RestController; + +@RestController +@RequestMapping("/api") +@RequiredArgsConstructor +@Slf4j +public class TopicStarterControllerImpl implements TopicStarterController { + + private final TopicStarterService topicStarterService; + + @Override + @GetMapping("/topicstarters") + public ResponseEntity> topics(@RequestParam(required = false) String user, + @RequestParam(required = false) String board, + @RequestParam(required = false) String topic, + final Pageable pageable) { + + return topicStarterService + .getWithFilters(user, board, topic, pageable) + .onFailure(throwable -> log.error("An error has occurred while processing a GET request", throwable)) + .map(ResponseEntity::ok) + .get(); + } +} diff --git a/api/src/main/resources/application-dev.yml b/api/src/main/resources/application-dev.yml new file mode 100644 index 0000000..90d1a07 --- /dev/null +++ b/api/src/main/resources/application-dev.yml @@ -0,0 +1,6 @@ +spring: + datasource: + driver-class-name: org.postgresql.Driver + url: jdbc:postgresql://localhost:5432/${POSTGRES_DB}?ApplicationName=topic-starters + username: ${POSTGRES_USER} + password: ${POSTGRES_PASSWORD} diff --git a/api/src/main/resources/application.yml b/api/src/main/resources/application.yml new file mode 100644 index 0000000..7e4e740 --- /dev/null +++ b/api/src/main/resources/application.yml @@ -0,0 +1,42 @@ +server: + port: 8080 + compression: + enabled: true + min-response-size: 1024 + mime-types: application/json,application/xml,text/plain + +spring: + datasource: + driver-class-name: org.postgresql.Driver + url: jdbc:postgresql://topic-starters-postgres:5432/${POSTGRES_DB}?ApplicationName=topic-starters + username: ${POSTGRES_USER} + password: ${POSTGRES_PASSWORD} + sql-script-encoding: UTF-8 + initialization-mode: never + + jpa: + hibernate: + ddl-auto: validate + naming: + implicit-strategy: org.hibernate.boot.model.naming.ImplicitNamingStrategyComponentPathImpl + + properties: + hibernate: + format_sql: true + show_sql: false + jdbc: + lob: + non_contextual_creation: true + + flyway: + enabled: true + + servlet: + multipart: + enabled: true + file-size-threshold: 2KB + max-file-size: 128MB + max-request-size: 256MB + + config: + additional-location: classpath:/config/development/ diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 0000000..530da24 --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,42 @@ +version: '3.2' + +services: + topic_starters_postgres_data: + image: postgres:10.7 + container_name: topic-starters-postgres + expose: + - "5432" + volumes: + - 'topic_starters_postgres_data:/var/lib/postgresql/data' + env_file: + - env/topic_starters_postgres.env + ports: + - "5432:5432" + networks: + - topic-starters-net + restart: on-failure + + topic_starters: + build: ./ + container_name: topic-starters-service + ports: + - "8080:8080" + env_file: + - ./env/topic_starters_postgres.env + secrets: + - topic_starters_username + - topic_starters_password + networks: + - topic-starters-net + restart: on-failure + +volumes: + topic_starters_postgres_data: +networks: + topic-starters-net: + driver: bridge +secrets: + topic_starters_username: + file: ./secrets/username + topic_starters_password: + file: ./secrets/password diff --git a/entity/pom.xml b/entity/pom.xml new file mode 100644 index 0000000..c9dae27 --- /dev/null +++ b/entity/pom.xml @@ -0,0 +1,83 @@ + + + + topicstarter + gr.thmmy.mthmmy.topicstarter + 1.0.0 + + 4.0.0 + + entity + + + + + org.springframework.boot + spring-boot-starter-data-jpa + + + com.querydsl + querydsl-jpa + + + + + org.flywaydb + flyway-core + + + + + org.projectlombok + lombok + provided + + + + + + + com.mysema.maven + apt-maven-plugin + + + + process + + + target/generated-sources/java + com.querydsl.apt.jpa.JPAAnnotationProcessor + + + + + + org.apache.maven.plugins + maven-source-plugin + + + attach-sources + verify + + jar + + + + + + org.apache.maven.plugins + maven-javadoc-plugin + + + attach-javadocs + verify + + jar + + + + + + + diff --git a/entity/src/main/java/gr/thmmy/mthmmy/topicstarter/entity/AbstractEntity.java b/entity/src/main/java/gr/thmmy/mthmmy/topicstarter/entity/AbstractEntity.java new file mode 100644 index 0000000..dbb06a8 --- /dev/null +++ b/entity/src/main/java/gr/thmmy/mthmmy/topicstarter/entity/AbstractEntity.java @@ -0,0 +1,23 @@ +package gr.thmmy.mthmmy.topicstarter.entity; + +import lombok.Data; + +import javax.persistence.Column; +import javax.persistence.Id; +import javax.persistence.MappedSuperclass; +import javax.persistence.PrePersist; +import java.util.UUID; + +@Data +@MappedSuperclass +public abstract class AbstractEntity { + + @Id + @Column(columnDefinition = "UUID") + protected String id; + + @PrePersist + protected void onCreate() { + this.id = UUID.randomUUID().toString(); + } +} diff --git a/entity/src/main/java/gr/thmmy/mthmmy/topicstarter/entity/TopicStarterEntityConfiguration.java b/entity/src/main/java/gr/thmmy/mthmmy/topicstarter/entity/TopicStarterEntityConfiguration.java new file mode 100644 index 0000000..a8c67f6 --- /dev/null +++ b/entity/src/main/java/gr/thmmy/mthmmy/topicstarter/entity/TopicStarterEntityConfiguration.java @@ -0,0 +1,9 @@ +package gr.thmmy.mthmmy.topicstarter.entity; + +import org.springframework.boot.autoconfigure.domain.EntityScan; +import org.springframework.context.annotation.Configuration; + +@Configuration +@EntityScan +public class TopicStarterEntityConfiguration { +} diff --git a/entity/src/main/java/gr/thmmy/mthmmy/topicstarter/entity/topic/TopicStarter.java b/entity/src/main/java/gr/thmmy/mthmmy/topicstarter/entity/topic/TopicStarter.java new file mode 100644 index 0000000..8cf4cde --- /dev/null +++ b/entity/src/main/java/gr/thmmy/mthmmy/topicstarter/entity/topic/TopicStarter.java @@ -0,0 +1,47 @@ +package gr.thmmy.mthmmy.topicstarter.entity.topic; + +import gr.thmmy.mthmmy.topicstarter.entity.AbstractEntity; +import lombok.Data; +import lombok.EqualsAndHashCode; + +import javax.persistence.Column; +import javax.persistence.Entity; +import javax.persistence.Table; + +@Data +@EqualsAndHashCode(callSuper = true) +@Entity +@Table(name = "topic_starter") +public class TopicStarter extends AbstractEntity { + + @Column(nullable = false) + private Long topicId; + + @Column(nullable = false) + private String topicUrl; + + @Column(nullable = false) + private String starterUsername; + + private String starterUrl; + + private Long starterId; + + @Column(nullable = false) + private String boardTitle; + + @Column(nullable = false) + private String boardUrl; + + @Column(nullable = false) + private Long boardId; + + @Column(nullable = false) + private String topicSubject; + + @Column(nullable = false) + private Long numberOfReplies; + + @Column(nullable = false) + private Long numberOfViews; +} diff --git a/entity/src/main/resources/db/migration/V1_00__create_table_topic_starter.sql b/entity/src/main/resources/db/migration/V1_00__create_table_topic_starter.sql new file mode 100644 index 0000000..5947a3a --- /dev/null +++ b/entity/src/main/resources/db/migration/V1_00__create_table_topic_starter.sql @@ -0,0 +1,16 @@ +create table topic_starter +( + id varchar(255) not null + constraint topic_starter_id_pkey primary key, + topic_id bigint not null, + topic_url varchar(255) not null, + starter_username varchar(255) not null, + starter_url varchar(255) not null, + starter_id bigint not null, + board_title varchar(255) not null, + board_url varchar(255) not null, + board_id bigint not null, + topic_subject varchar(255) not null, + number_of_replies bigint not null, + number_of_views bigint not null +); diff --git a/env/topic_starters_postgres.example.env b/env/topic_starters_postgres.example.env new file mode 100644 index 0000000..0f2220b --- /dev/null +++ b/env/topic_starters_postgres.example.env @@ -0,0 +1,3 @@ +POSTGRES_USER=postgres +POSTGRES_PASSWORD=postgres +POSTGRES_DB=topic-starters diff --git a/pom.xml b/pom.xml new file mode 100644 index 0000000..fc81e5e --- /dev/null +++ b/pom.xml @@ -0,0 +1,201 @@ + + + 4.0.0 + + gr.thmmy.mthmmy.topicstarter + topicstarter + 1.0.0 + pom + + + org.springframework.boot + spring-boot-starter-parent + 2.1.6.RELEASE + + + + api + service + entity + repository + scheduled + + + + + + + UTF-8 + UTF-8 + + 1.8 + + 3.0.1 + 3.0.1 + 3.5.1 + 1.1.3 + + 1.0.0-alpha-3 + 1.18.8 + 1.12.1 + 3.9.0 + 2.9.0 + + + + + + + gr.thmmy.mthmmy.topicstarter + api + ${project.version} + + + gr.thmmy.mthmmy.topicstarter + entity + ${project.version} + + + gr.thmmy.mthmmy.topicstarter + repository + ${project.version} + + + gr.thmmy.mthmmy.topicstarter + scheduled + ${project.version} + + + gr.thmmy.mthmmy.topicstarter + service + ${project.version} + + + + + io.vavr + vavr + ${vavr.version} + + + org.projectlombok + lombok + ${lombok.version} + + + org.jsoup + jsoup + ${jsoup.version} + + + com.squareup.okhttp3 + okhttp + ${okhttp.version} + + + + + com.querydsl + querydsl-jpa + ${querydsl.version} + + + + + com.fasterxml.jackson.core + jackson-annotations + ${jackson.version} + + + com.fasterxml.jackson.datatype + jackson-datatype-jsr310 + ${jackson.version} + + + + + + + + + org.apache.maven.plugins + maven-compiler-plugin + ${maven-compiler-plugin.version} + + + com.mysema.maven + apt-maven-plugin + ${apt-maven-plugin.version} + + + com.querydsl + querydsl-apt + ${querydsl.version} + + + + + org.apache.maven.plugins + maven-source-plugin + ${maven-source-plugin.version} + + + org.apache.maven.plugins + maven-javadoc-plugin + ${maven-javadoc-plugin.version} + + + + + + + + spring-milestones + Spring Milestones + https://repo.spring.io/milestone + + false + + + + spring-snapshots + Spring Snapshots + https://repo.spring.io/snapshot + + + + + + jitpack.io + https://jitpack.io + + + + + + + false + + jcenter-central + bintray-plugins + https://jcenter.bintray.com + + + + spring-milestones + Spring Milestones + https://repo.spring.io/milestone + + false + + + + spring-snapshots + Spring Snapshots + https://repo.spring.io/snapshot + + + + + diff --git a/repository/pom.xml b/repository/pom.xml new file mode 100644 index 0000000..a17f87a --- /dev/null +++ b/repository/pom.xml @@ -0,0 +1,74 @@ + + + + topicstarter + gr.thmmy.mthmmy.topicstarter + 1.0.0 + + 4.0.0 + + repository + + + + + gr.thmmy.mthmmy.topicstarter + entity + + + + + com.querydsl + querydsl-apt + provided + + + com.querydsl + querydsl-jpa + + + + + + org.projectlombok + lombok + provided + + + io.vavr + vavr + + + + + + + org.apache.maven.plugins + maven-source-plugin + + + attach-sources + verify + + jar + + + + + + org.apache.maven.plugins + maven-javadoc-plugin + + + attach-javadocs + verify + + jar + + + + + + + \ No newline at end of file diff --git a/repository/src/main/java/gr/thmmy/mthmmy/topicstarter/GenericRepository.java b/repository/src/main/java/gr/thmmy/mthmmy/topicstarter/GenericRepository.java new file mode 100644 index 0000000..50390ec --- /dev/null +++ b/repository/src/main/java/gr/thmmy/mthmmy/topicstarter/GenericRepository.java @@ -0,0 +1,11 @@ +package gr.thmmy.mthmmy.topicstarter; + +import org.springframework.data.jpa.repository.JpaRepository; +import org.springframework.data.querydsl.QuerydslPredicateExecutor; +import org.springframework.data.repository.NoRepositoryBean; + +import java.util.UUID; + +@NoRepositoryBean +public interface GenericRepository extends JpaRepository, QuerydslPredicateExecutor { +} diff --git a/repository/src/main/java/gr/thmmy/mthmmy/topicstarter/TopicStarterRepositoryConfig.java b/repository/src/main/java/gr/thmmy/mthmmy/topicstarter/TopicStarterRepositoryConfig.java new file mode 100644 index 0000000..f542af8 --- /dev/null +++ b/repository/src/main/java/gr/thmmy/mthmmy/topicstarter/TopicStarterRepositoryConfig.java @@ -0,0 +1,11 @@ +package gr.thmmy.mthmmy.topicstarter; + +import org.springframework.context.annotation.ComponentScan; +import org.springframework.context.annotation.Configuration; +import org.springframework.data.jpa.repository.config.EnableJpaRepositories; + +@Configuration +@ComponentScan +@EnableJpaRepositories +public class TopicStarterRepositoryConfig { +} diff --git a/repository/src/main/java/gr/thmmy/mthmmy/topicstarter/repository/topic/TopicStarterRepository.java b/repository/src/main/java/gr/thmmy/mthmmy/topicstarter/repository/topic/TopicStarterRepository.java new file mode 100644 index 0000000..ec0b56c --- /dev/null +++ b/repository/src/main/java/gr/thmmy/mthmmy/topicstarter/repository/topic/TopicStarterRepository.java @@ -0,0 +1,9 @@ +package gr.thmmy.mthmmy.topicstarter.repository.topic; + +import gr.thmmy.mthmmy.topicstarter.GenericRepository; +import gr.thmmy.mthmmy.topicstarter.entity.topic.TopicStarter; +import org.springframework.stereotype.Repository; + +@Repository +public interface TopicStarterRepository extends GenericRepository { +} diff --git a/run.sh b/run.sh new file mode 100644 index 0000000..ed3f572 --- /dev/null +++ b/run.sh @@ -0,0 +1,8 @@ +#!/bin/sh + +topic_starters_username=$(cat /run/secrets/topic_starters_username) +topic_starters_password=$(cat /run/secrets/topic_starters_password) + +java -DTOPIC_STARTERS_USERNAME="$topic_starters_username" \ + -DTOPIC_STARTERS_PASSWORD="$topic_starters_password" \ + -jar app.jar gr.thmmy.mthmmy.topicstarter.api.TopicStarterApplication diff --git a/scheduled/pom.xml b/scheduled/pom.xml new file mode 100644 index 0000000..baebd36 --- /dev/null +++ b/scheduled/pom.xml @@ -0,0 +1,89 @@ + + + 4.0.0 + + + topicstarter + gr.thmmy.mthmmy.topicstarter + 1.0.0 + + + scheduled + + + + + + + + + org.springframework.boot + spring-boot-starter + + + org.springframework.boot + spring-boot-configuration-processor + true + + + + + + org.projectlombok + lombok + provided + + + io.vavr + vavr + + + + + + com.github.spullara.mustache.java + compiler + 0.9.5 + + + gr.thmmy.mthmmy.topicstarter + entity + + + gr.thmmy.mthmmy.topicstarter + service + + + + + + + org.apache.maven.plugins + maven-source-plugin + + + attach-sources + verify + + jar + + + + + + org.apache.maven.plugins + maven-javadoc-plugin + + + attach-javadocs + verify + + jar + + + + + + + \ No newline at end of file diff --git a/scheduled/src/main/java/gr/thmmy/mthmmy/topicstarter/scheduled/TopicStarterScheduled.java b/scheduled/src/main/java/gr/thmmy/mthmmy/topicstarter/scheduled/TopicStarterScheduled.java new file mode 100644 index 0000000..0d81dbc --- /dev/null +++ b/scheduled/src/main/java/gr/thmmy/mthmmy/topicstarter/scheduled/TopicStarterScheduled.java @@ -0,0 +1,36 @@ +package gr.thmmy.mthmmy.topicstarter.scheduled; + +import gr.thmmy.mthmmy.topicstarter.service.topic.starter.parser.TopicStarterParserService; +import lombok.Data; +import lombok.extern.slf4j.Slf4j; +import org.springframework.scheduling.annotation.Scheduled; +import org.springframework.stereotype.Component; + +import javax.annotation.PostConstruct; + +@Data +@Slf4j +@Component +public class TopicStarterScheduled { + + private final TopicStarterParserService topicStarterParserService; + + // Runs at 02:00am every day of every month + @Scheduled(cron = "0 0 02 * * *") + public void run() throws Exception { + + topicStarterParserService + .parseTopicStarters() + .onFailure(throwable -> log.error("An error has occurred while processing a GET request", throwable)) + .get(); + } + + @PostConstruct + public void init() { + + topicStarterParserService + .parseTopicStarters() + .onFailure(throwable -> log.error("An error has occurred while processing a GET request", throwable)) + .get(); + } +} diff --git a/scheduled/src/main/java/gr/thmmy/mthmmy/topicstarter/scheduled/TopicStarterSchedulesConfig.java b/scheduled/src/main/java/gr/thmmy/mthmmy/topicstarter/scheduled/TopicStarterSchedulesConfig.java new file mode 100644 index 0000000..ed76326 --- /dev/null +++ b/scheduled/src/main/java/gr/thmmy/mthmmy/topicstarter/scheduled/TopicStarterSchedulesConfig.java @@ -0,0 +1,11 @@ +package gr.thmmy.mthmmy.topicstarter.scheduled; + +import org.springframework.context.annotation.ComponentScan; +import org.springframework.context.annotation.Configuration; +import org.springframework.scheduling.annotation.EnableScheduling; + +@Configuration +@ComponentScan +@EnableScheduling +public class TopicStarterSchedulesConfig { +} diff --git a/service/pom.xml b/service/pom.xml new file mode 100644 index 0000000..db78a75 --- /dev/null +++ b/service/pom.xml @@ -0,0 +1,93 @@ + + + 4.0.0 + + + topicstarter + gr.thmmy.mthmmy.topicstarter + 1.0.0 + + + service + + + + + gr.thmmy.mthmmy.topicstarter + entity + + + gr.thmmy.mthmmy.topicstarter + repository + + + + + org.springframework.boot + spring-boot-starter + + + org.springframework.boot + spring-boot-configuration-processor + true + + + + + org.projectlombok + lombok + provided + + + io.vavr + vavr + + + org.jsoup + jsoup + + + com.squareup.okhttp3 + okhttp + + + + + com.github.spullara.mustache.java + compiler + 0.9.5 + + + + + + + org.apache.maven.plugins + maven-source-plugin + + + attach-sources + verify + + jar + + + + + + org.apache.maven.plugins + maven-javadoc-plugin + + + attach-javadocs + verify + + jar + + + + + + + diff --git a/service/src/main/java/gr/thmmy/mthmmy/topicstarter/service/TopicStarterServiceConfiguration.java b/service/src/main/java/gr/thmmy/mthmmy/topicstarter/service/TopicStarterServiceConfiguration.java new file mode 100644 index 0000000..4f7054f --- /dev/null +++ b/service/src/main/java/gr/thmmy/mthmmy/topicstarter/service/TopicStarterServiceConfiguration.java @@ -0,0 +1,46 @@ +package gr.thmmy.mthmmy.topicstarter.service; + +import io.vavr.control.Try; +import okhttp3.Cookie; +import okhttp3.CookieJar; +import okhttp3.HttpUrl; +import okhttp3.OkHttpClient; +import org.springframework.context.annotation.Bean; +import org.springframework.context.annotation.ComponentScan; +import org.springframework.context.annotation.Configuration; +import org.springframework.lang.NonNull; + +import java.util.ArrayList; +import java.util.concurrent.TimeUnit; + +@Configuration +@ComponentScan +public class TopicStarterServiceConfiguration { + + @Bean + public OkHttpClient getClient() { + + return Try + .of(() -> new CookieJar() { + private final java.util.List cookieStore = new ArrayList<>(); + + @Override + public void saveFromResponse(@NonNull HttpUrl url, @NonNull java.util.List cookies) { + cookieStore.addAll(cookies); + } + + @Override + public java.util.List loadForRequest(@NonNull HttpUrl url) { + return cookieStore; + } + } + ).map(cookieJar -> new OkHttpClient.Builder() + .cookieJar(cookieJar) + .connectTimeout(30, TimeUnit.SECONDS) + .writeTimeout(30, TimeUnit.SECONDS) + .readTimeout(30, TimeUnit.SECONDS) + .retryOnConnectionFailure(true) + .build()) + .get(); + } +} diff --git a/service/src/main/java/gr/thmmy/mthmmy/topicstarter/service/topic/starter/TopicStarterService.java b/service/src/main/java/gr/thmmy/mthmmy/topicstarter/service/topic/starter/TopicStarterService.java new file mode 100644 index 0000000..ff10064 --- /dev/null +++ b/service/src/main/java/gr/thmmy/mthmmy/topicstarter/service/topic/starter/TopicStarterService.java @@ -0,0 +1,14 @@ +package gr.thmmy.mthmmy.topicstarter.service.topic.starter; + +import gr.thmmy.mthmmy.topicstarter.entity.topic.TopicStarter; +import io.vavr.control.Try; +import org.springframework.data.domain.Page; +import org.springframework.data.domain.Pageable; + +public interface TopicStarterService { + + Try> getWithFilters(String user, + String board, + String topic, + Pageable pageable); +} diff --git a/service/src/main/java/gr/thmmy/mthmmy/topicstarter/service/topic/starter/TopicStarterServiceImpl.java b/service/src/main/java/gr/thmmy/mthmmy/topicstarter/service/topic/starter/TopicStarterServiceImpl.java new file mode 100644 index 0000000..7e54fd1 --- /dev/null +++ b/service/src/main/java/gr/thmmy/mthmmy/topicstarter/service/topic/starter/TopicStarterServiceImpl.java @@ -0,0 +1,116 @@ +package gr.thmmy.mthmmy.topicstarter.service.topic.starter; + +import com.querydsl.core.types.dsl.BooleanExpression; +import com.querydsl.core.types.dsl.Expressions; +import gr.thmmy.mthmmy.topicstarter.entity.topic.QTopicStarter; +import gr.thmmy.mthmmy.topicstarter.entity.topic.TopicStarter; +import gr.thmmy.mthmmy.topicstarter.repository.topic.TopicStarterRepository; +import io.vavr.control.Option; +import io.vavr.control.Try; +import lombok.Data; +import lombok.RequiredArgsConstructor; +import org.springframework.data.domain.Page; +import org.springframework.data.domain.Pageable; +import org.springframework.stereotype.Service; + +import javax.annotation.Nonnull; +import javax.annotation.Nullable; + +import static java.util.Objects.requireNonNull; + +@RequiredArgsConstructor +@Data +@Service +public class TopicStarterServiceImpl implements TopicStarterService { + + private final TopicStarterRepository topicStarterRepository; + + @Override + public Try> getWithFilters(final @Nullable String user, + final @Nullable String board, + final @Nullable String topic, + final Pageable pageable) { + requireNonNull(pageable, "pageable is null"); + + return Try + .of(() -> Expressions.asBoolean(true).isTrue()) + .flatMap(topicStarterPredicate -> Option + .of(user) + .map(userNotNull -> extractId(userNotNull) + .toTry() + .flatMap(this::getUserIdPredicate) + .orElse(getUsernamePredicate(userNotNull)) + .map(topicStarterPredicate::and)) + .getOrElse(Try.success(topicStarterPredicate)) + ) + .flatMap(topicStarterPredicate -> Option + .of(board) + .map(boardNotNull -> extractId(boardNotNull) + .toTry() + .flatMap(this::getBoardIdPredicate) + .orElse(getBoardTitlePredicate(boardNotNull)) + .map(topicStarterPredicate::and)) + .getOrElse(Try.success(topicStarterPredicate)) + ) + .flatMap(topicStarterPredicate -> Option + .of(topic) + .map(topicNotNull -> extractId(topicNotNull) + .toTry() + .flatMap(this::getTopicIdPredicate) + .orElse(getTopicSubjectPredicate(topicNotNull)) + .map(topicStarterPredicate::and)) + .getOrElse(Try.success(topicStarterPredicate)) + ) + .map(topicStarterPredicate -> topicStarterRepository.findAll(topicStarterPredicate, pageable)); + } + + private Option extractId(final @Nonnull String input) { + + return Try + .of(() -> Long.parseLong(input)) + .recoverWith(throwable -> Try.success(null)) + .toOption(); + } + + private Try getUsernamePredicate(final @Nonnull String username) { + + return Try + .of(() -> QTopicStarter.topicStarter) + .map(qTopicStarter -> qTopicStarter.starterUsername.like(username)); + } + + private Try getUserIdPredicate(final @Nonnull Long userId) { + + return Try + .of(() -> QTopicStarter.topicStarter) + .map(qTopicStarter -> qTopicStarter.starterId.eq(userId)); + } + + private Try getBoardTitlePredicate(final @Nonnull String boardTitle) { + + return Try + .of(() -> QTopicStarter.topicStarter) + .map(qTopicStarter -> qTopicStarter.boardTitle.like(boardTitle)); + } + + private Try getBoardIdPredicate(final @Nonnull Long boardId) { + + return Try + .of(() -> QTopicStarter.topicStarter) + .map(qTopicStarter -> qTopicStarter.boardId.eq(boardId)); + } + + private Try getTopicSubjectPredicate(final @Nonnull String topicSubject) { + + return Try + .of(() -> QTopicStarter.topicStarter) + .map(qTopicStarter -> qTopicStarter.topicSubject.like(topicSubject)); + } + + private Try getTopicIdPredicate(final @Nonnull Long topicId) { + + return Try + .of(() -> QTopicStarter.topicStarter) + .map(qTopicStarter -> qTopicStarter.topicId.eq(topicId)); + } +} diff --git a/service/src/main/java/gr/thmmy/mthmmy/topicstarter/service/topic/starter/parser/TopicStarterParserService.java b/service/src/main/java/gr/thmmy/mthmmy/topicstarter/service/topic/starter/parser/TopicStarterParserService.java new file mode 100644 index 0000000..8cd4ab7 --- /dev/null +++ b/service/src/main/java/gr/thmmy/mthmmy/topicstarter/service/topic/starter/parser/TopicStarterParserService.java @@ -0,0 +1,8 @@ +package gr.thmmy.mthmmy.topicstarter.service.topic.starter.parser; + +import io.vavr.control.Try; + +public interface TopicStarterParserService { + + Try parseTopicStarters(); +} diff --git a/service/src/main/java/gr/thmmy/mthmmy/topicstarter/service/topic/starter/parser/TopicStarterParserServiceImpl.java b/service/src/main/java/gr/thmmy/mthmmy/topicstarter/service/topic/starter/parser/TopicStarterParserServiceImpl.java new file mode 100644 index 0000000..a2e42ad --- /dev/null +++ b/service/src/main/java/gr/thmmy/mthmmy/topicstarter/service/topic/starter/parser/TopicStarterParserServiceImpl.java @@ -0,0 +1,241 @@ +package gr.thmmy.mthmmy.topicstarter.service.topic.starter.parser; + +import gr.thmmy.mthmmy.topicstarter.entity.topic.QTopicStarter; +import gr.thmmy.mthmmy.topicstarter.entity.topic.TopicStarter; +import gr.thmmy.mthmmy.topicstarter.repository.topic.TopicStarterRepository; +import gr.thmmy.mthmmy.topicstarter.service.topic.starter.parser.util.TopicParserUtils; +import io.vavr.Tuple; +import io.vavr.collection.List; +import io.vavr.control.Option; +import io.vavr.control.Try; +import lombok.Data; +import lombok.extern.slf4j.Slf4j; +import okhttp3.*; +import org.jsoup.Jsoup; +import org.jsoup.nodes.Document; +import org.jsoup.nodes.Element; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.core.env.Environment; +import org.springframework.stereotype.Service; + +import java.util.stream.IntStream; + +import static gr.thmmy.mthmmy.topicstarter.service.topic.starter.parser.util.BoardParserUtils.extractBoardIdFromUrl; +import static gr.thmmy.mthmmy.topicstarter.service.topic.starter.parser.util.BoardParserUtils.parseSubBoards; +import static java.util.Objects.requireNonNull; + +@Service +@Data +@Slf4j +public class TopicStarterParserServiceImpl implements TopicStarterParserService { + /* Constants */ + private static final String baseUrl = "https://www.thmmy.gr/smf/index.php?action=forum"; + private static final String RECYCLING_BIN_BOARD_ID = "244.0"; + private static final String USERNAME_ENV_VAR = "TOPIC_STARTERS_USERNAME"; + private static final String PASSWORD_ENV_VAR = "TOPIC_STARTERS_PASSWORD"; + private static final HttpUrl loginUrl = HttpUrl.parse("https://www.thmmy.gr/smf/index.php?action=login2"); + + private final OkHttpClient client; + private final TopicStarterRepository topicStarterRepository; + + @Autowired + private Environment environment; + + @Override + public Try parseTopicStarters() { + + return login() + .flatMap(ignored -> parseBoard(baseUrl)); + } + + private Try parseBoard(final String url) { + requireNonNull(url, "url is null"); + + return Try.of(Request.Builder::new) + // Builds and executes request + .map(requestBuilder -> requestBuilder + .url(url) + .build()) + .mapTry(request -> client + .newCall(request) + .execute()) + .flatMap(this::getResponseString) + .map(Jsoup::parse) + .flatMap(document -> parseSubBoards(document) // Parses the sub boards + .flatMap(subBoards -> Option + .of(subBoards) + .toTry() + .flatMap(subBoardsNotNull -> Try + .run(() -> subBoardsNotNull + .stream() + .filter(subBoard -> !subBoard + .attr("href") + .contains("board=" + RECYCLING_BIN_BOARD_ID)) + .forEach(subBoard -> parseBoard(subBoard.attr("href"))) + ) + ) + ) + // Parses the topics + .flatMap(ignored -> parseTopics(document, url)) + ); + } + + private Try parseTopics(final Document document, + final String boardUrl) { + requireNonNull(document, "document is null"); + requireNonNull(boardUrl, "boardUrl is null"); + + return Try.of(() -> document // Finds the number of pages in this board + .select("a.navPages") + .last()) + .map(pageNumber -> Option + .of(pageNumber) + .map(Element::text) + .map(Integer::parseInt) + .getOrElse(1)) + .flatMap(numberOfPages -> Try // Parses the board title + .of(() -> document + .select("div.nav>b>a") + .last() + .text()) + .flatMap(boardTitle -> extractBoardIdFromUrl(boardUrl) // Parses topics of the current page + .flatMap(boardId -> saveTopics(document, boardTitle, boardUrl, boardId) + .flatMap(ignored -> IntStream // Parses the topics from the rest of the pages + .range(1, numberOfPages) + .boxed() + .map(page -> Try // Builds the URL of the board for each page + .of(() -> String.join(".", + boardUrl.substring(0, boardUrl.lastIndexOf(".")), + String.valueOf(page * 20)) + ).flatMap(pageUrl -> Try + .of(Request.Builder::new) + .map(requestBuilder -> requestBuilder + .url(pageUrl) + .build() + ) + ) + .mapTry(request -> client + .newCall(request) + .execute()) + .flatMap(this::getResponseString) + .map(Jsoup::parse) + .flatMap(pageDocument -> saveTopics( + pageDocument, + boardTitle, + boardUrl, + boardId))) + .collect(List.collector()) + .transform(Try::sequence) + ) + ) + ).map(ignored -> null)); + } + + private Try saveTopics(final Document document, + final String boardTitle, + final String boardUrl, + final Long boardId) { + requireNonNull(document, "document is null"); + requireNonNull(boardTitle, "boardTitle is null"); + requireNonNull(boardUrl, "boardUrl is null"); + requireNonNull(boardId, "boardId is null"); + + return Try // Finds this page's topics + .of(() -> document + .select("table.bordercolor tbody>tr:not([class])")) + .flatMap(topics -> Try + .run(() -> topics + .forEach(topicRow -> TopicParserUtils + .parseTopic(topicRow, boardTitle, boardUrl, boardId) + .map(topicStarter -> savedTopicStarter(topicStarter) + .flatMap(savedTopicStarter -> updateSavedTopicStarter(savedTopicStarter, topicStarter)) + .getOrElse(topicStarter) + ) + .map(topicStarterRepository::save) + ) + ) + ); + } + + private Option savedTopicStarter(final TopicStarter topicStarter) { + requireNonNull(topicStarter, "topicStarter is null"); + + return Option + .of(QTopicStarter.topicStarter) + .map(qTopicStarter -> qTopicStarter.topicId.eq(topicStarter.getTopicId())) + .map(topicStarterRepository::findOne) + .flatMap(Option::ofOptional); + } + + private Option updateSavedTopicStarter(final TopicStarter savedTopicStarter, final TopicStarter newTopicStarter) { + requireNonNull(savedTopicStarter, "savedTopicStarter is null"); + requireNonNull(newTopicStarter, "newTopicStarter is null"); + + savedTopicStarter.setTopicId(newTopicStarter.getTopicId()); + savedTopicStarter.setTopicUrl(newTopicStarter.getTopicUrl()); + savedTopicStarter.setStarterUsername(newTopicStarter.getStarterUsername()); + savedTopicStarter.setStarterUrl(newTopicStarter.getStarterUrl()); + savedTopicStarter.setStarterId(newTopicStarter.getStarterId()); + savedTopicStarter.setBoardTitle(newTopicStarter.getBoardTitle()); + savedTopicStarter.setBoardUrl(newTopicStarter.getBoardUrl()); + savedTopicStarter.setBoardId(newTopicStarter.getBoardId()); + savedTopicStarter.setTopicSubject(newTopicStarter.getTopicSubject()); + savedTopicStarter.setNumberOfReplies(newTopicStarter.getNumberOfReplies()); + savedTopicStarter.setNumberOfViews(newTopicStarter.getNumberOfViews()); + + return Option.of(savedTopicStarter); + } + + private Try login() { + + return Option + .of(environment.getProperty(USERNAME_ENV_VAR)) + .map(username -> Option + .of(environment.getProperty(PASSWORD_ENV_VAR)) + .map(password -> Tuple.of(username, password)) + .getOrElseThrow(() -> new RuntimeException("Password is null")) + ).map(loginSecrets -> Option + .of(loginUrl) + .map(loginUrlNotNull -> Try + .of(FormBody.Builder::new) + .map(builder -> builder + .add("user", loginSecrets._1) + .add("passwrd", loginSecrets._2) + .add("cookielength", "-1") // -1 is forever + .build() + ).flatMap(formBody -> Try + .of(Request.Builder::new) + .map(builder -> builder + .url(loginUrlNotNull) + .post(formBody) + .build() + ) + ).mapTry(request -> client + .newCall(request) + .execute() + ).flatMap(response -> Try + .run(() -> response + .body() + .close() + ) + ) + ).getOrElseThrow(() -> new RuntimeException("Login URL is null.")) + ).getOrElseThrow(() -> new RuntimeException("Username is null")); + } + + private Try getResponseString(final Response response) { + requireNonNull(response, "response is null"); + + // Checks response for null and closes response body + return Try + .of(response::body) + .flatMap(responseBody -> Option + .of(responseBody) + .toTry() + .mapTry(ResponseBody::string) + .flatMap(responseBodyString -> Try + .run(responseBody::close) + .map(ignored -> responseBodyString)) + ); + } +} diff --git a/service/src/main/java/gr/thmmy/mthmmy/topicstarter/service/topic/starter/parser/util/BoardParserUtils.java b/service/src/main/java/gr/thmmy/mthmmy/topicstarter/service/topic/starter/parser/util/BoardParserUtils.java new file mode 100644 index 0000000..12706ae --- /dev/null +++ b/service/src/main/java/gr/thmmy/mthmmy/topicstarter/service/topic/starter/parser/util/BoardParserUtils.java @@ -0,0 +1,33 @@ +package gr.thmmy.mthmmy.topicstarter.service.topic.starter.parser.util; + +import io.vavr.control.Try; +import org.jsoup.nodes.Document; +import org.jsoup.select.Elements; + +import java.util.regex.Pattern; + +import static java.util.Objects.requireNonNull; + +public abstract class BoardParserUtils { + + public static Try extractBoardIdFromUrl(final String url) { + requireNonNull(url, "url is null"); + + return Try.success(".+?board=([0-9]+)") + .map(regex -> Pattern.compile(regex, Pattern.MULTILINE)) + .map(pattern -> pattern.matcher(url)) + .map(matcher -> Try + .of(matcher::find) + .filter(aBoolean -> aBoolean) + .map(ignored -> Long.parseLong(matcher.group(1))) + .getOrElse(-1L) + ); + } + + public static Try parseSubBoards(final Document document) { + requireNonNull(document, "document is null"); + + return Try.of(() -> document + .select("div.tborder tbody tr.windowbg2 td>b>a[name^=b]")); + } +} diff --git a/service/src/main/java/gr/thmmy/mthmmy/topicstarter/service/topic/starter/parser/util/TopicParserUtils.java b/service/src/main/java/gr/thmmy/mthmmy/topicstarter/service/topic/starter/parser/util/TopicParserUtils.java new file mode 100644 index 0000000..238e6f3 --- /dev/null +++ b/service/src/main/java/gr/thmmy/mthmmy/topicstarter/service/topic/starter/parser/util/TopicParserUtils.java @@ -0,0 +1,177 @@ +package gr.thmmy.mthmmy.topicstarter.service.topic.starter.parser.util; + +import gr.thmmy.mthmmy.topicstarter.entity.topic.TopicStarter; +import io.vavr.control.Option; +import io.vavr.control.Try; +import org.jsoup.nodes.Element; +import org.jsoup.select.Elements; + +import javax.annotation.Nullable; +import java.util.regex.Pattern; + +import static java.util.Objects.requireNonNull; + +public abstract class TopicParserUtils { + + public static Try parseTopic(final Element topicRow, + final String boardTitle, + final String boardUrl, + final Long boardId) { + requireNonNull(topicRow, "topicRow is null"); + requireNonNull(boardTitle, "boardTitle is null"); + requireNonNull(boardUrl, "boardUrl is null"); + requireNonNull(boardId, "boardId is null"); + + return Try.of(() -> topicRow + .select("td")) + .flatMap(topicColumns -> Try + .of(() -> topicColumns + .get(3) + .select("a") + .first()) + .flatMap(starterUrlElement -> Try + .of(TopicStarter::new) + .flatMap(topic -> parseTopicSubject(topicColumns) + .map(topicSubject -> { + topic.setTopicSubject(topicSubject); + + return topic; + }) + ).flatMap(topic -> parseTopicUrl(topicColumns) + .flatMap(topicUrl -> extractTopicIdFromUrl(topicUrl) + .map(topicId -> { + topic.setTopicUrl(topicUrl); + topic.setTopicId(topicId); + + return topic; + }) + ) + ).flatMap(topic -> parseTopicStarterUsername(topicColumns) + .map(starterUsername -> { + topic.setStarterUsername(starterUsername); + + return topic; + }) + ).flatMap(topic -> parseTopicStarterUrl(starterUrlElement) + .flatMap(topicStarterUrl -> extractTopicStarterIdFromUrl(topicStarterUrl) + .map(topicStarterId -> { + topic.setStarterUrl(topicStarterUrl); + topic.setStarterId(topicStarterId); + + return topic; + }) + ) + ).flatMap(topic -> parseTopicNumberOfReplies(topicColumns) + .map(numReplies -> { + topic.setNumberOfReplies(numReplies); + + return topic; + }) + ).flatMap(topic -> parseTopicNumberOfViews(topicColumns) + .map(numViews -> { + topic.setNumberOfViews(numViews); + + return topic; + }) + ).map(topic -> { + topic.setBoardTitle(boardTitle); + topic.setBoardId(boardId); + topic.setBoardUrl(boardUrl); + + return topic; + } + ) + ) + ); + } + + private static Try parseTopicSubject(final Elements topicColumns) { + requireNonNull(topicColumns, "topicColumns is null"); + + return Try + .of(() -> topicColumns + .get(2) + .select("span>a") + .first() + .text()); + } + + private static Try parseTopicUrl(final Elements topicColumns) { + requireNonNull(topicColumns, "topicColumns is null"); + + return Try + .of(() -> topicColumns + .get(2) + .select("span>a") + .first() + .attr("href")); + } + + private static Try parseTopicStarterUsername(final Elements topicColumns) { + requireNonNull(topicColumns, "topicColumns is null"); + + return Try + .of(() -> topicColumns + .get(3) + .text()); + } + + private static Try parseTopicStarterUrl(final Element starterUrlEl) { + requireNonNull(starterUrlEl, "starterUrlEl is null"); + + return Try.of(() -> Option + .of(starterUrlEl) + .map(starterUrlElNotNull -> starterUrlElNotNull + .attr("href")) + .getOrElse(() -> null)); + } + + private static Try parseTopicNumberOfReplies(final Elements topicColumns) { + requireNonNull(topicColumns, "topicColumns is null"); + + return Try + .of(() -> topicColumns + .get(4) + .text()) + .map(Long::parseLong); + } + + private static Try parseTopicNumberOfViews(final Elements topicColumns) { + requireNonNull(topicColumns, "topicColumns is null"); + + return Try + .of(() -> topicColumns + .get(5) + .text()) + .map(Long::parseLong); + } + + private static Try extractTopicIdFromUrl(final String topicUrl) { + requireNonNull(topicUrl, "topicUrl is null"); + + return Try.success(".+?topic=([0-9]+)") + .map(regex -> Pattern.compile(regex, Pattern.MULTILINE)) + .map(pattern -> pattern.matcher(topicUrl)) + .map(matcher -> Try.of(matcher::find) + .filter(aBoolean -> aBoolean) + .map(ignored -> Long.parseLong(matcher.group(1))) + .getOrElse(-1L) + ); + + } + + private static Try extractTopicStarterIdFromUrl(final @Nullable String topicStarterUrl) { + + return Option + .of(topicStarterUrl) + .map(topicStarterUrlNotNull -> Try + .of(() -> ".+?profile;u=([0-9]+)") + .map(regex -> Pattern.compile(regex, Pattern.MULTILINE)) + .map(pattern -> pattern.matcher(topicStarterUrlNotNull)) + .map(matcher -> Try.of(matcher::find) + .filter(aBoolean -> aBoolean) + .map(ignored -> Long.parseLong(matcher.group(1))) + .getOrElse(-1L)) + ).getOrElse(Try.success(-1L)); + } +}