Compare commits
10 commits
92468c8ad8
...
378210b5eb
| Author | SHA1 | Date | |
|---|---|---|---|
| 378210b5eb | |||
| a56522695c | |||
| a85f6878ae | |||
| 580f80b521 | |||
| 6f85efcd26 | |||
| 23b263a50b | |||
| 379df61598 | |||
| 2e9b73a768 | |||
| c9a1c59eb4 | |||
| f6af36e999 |
18 changed files with 3712 additions and 221 deletions
3
.env.example
Normal file
3
.env.example
Normal file
|
|
@ -0,0 +1,3 @@
|
|||
POSTGRES_DB=root
|
||||
POSTGRES_USER=root
|
||||
POSTGRES_PASSWORD=root
|
||||
27
.gitignore
vendored
27
.gitignore
vendored
|
|
@ -1,3 +1,24 @@
|
|||
/target
|
||||
flake.lock
|
||||
.direnv/
|
||||
*.env
|
||||
|
||||
target
|
||||
**/*.rs.bk
|
||||
.idea
|
||||
*.iml
|
||||
/result*
|
||||
*.log
|
||||
*~
|
||||
|
||||
# cachix tmp file
|
||||
store-path-pre-build
|
||||
|
||||
# Devenv
|
||||
.devenv*
|
||||
devenv.local.nix
|
||||
|
||||
# direnv
|
||||
.direnv
|
||||
|
||||
# pre-commit
|
||||
.pre-commit-config.yaml
|
||||
|
||||
template/flake.lock
|
||||
|
|
|
|||
3082
Cargo.lock
generated
Executable file
3082
Cargo.lock
generated
Executable file
File diff suppressed because it is too large
Load diff
15
Cargo.toml
15
Cargo.toml
|
|
@ -1,8 +1,17 @@
|
|||
[package]
|
||||
name = "lwn-sub-snoozer"
|
||||
version = "0.1.0"
|
||||
name = "rssify"
|
||||
version = "0.1.1"
|
||||
edition = "2024"
|
||||
|
||||
[lib]
|
||||
name = "rssify"
|
||||
path = "src/lib.rs"
|
||||
|
||||
[[bin]]
|
||||
name = "rssify-cli"
|
||||
path = "src/bin/cli.rs"
|
||||
# required-features = ["cli"]
|
||||
|
||||
[dependencies]
|
||||
tokio = { version = "1.46.0", features = ["full"] }
|
||||
futures = "0.3.31"
|
||||
|
|
@ -13,3 +22,5 @@ chrono = "0.4.41"
|
|||
rss = "2.0.12"
|
||||
mini-redis = "0.4.1"
|
||||
tokio-postgres = "0.7.13"
|
||||
clap = { version = "4.5.42", features = ["derive"] }
|
||||
json = "0.12.4"
|
||||
|
|
|
|||
7
Dockerfile
Normal file
7
Dockerfile
Normal file
|
|
@ -0,0 +1,7 @@
|
|||
FROM rust:1.88
|
||||
|
||||
WORKDIR /src
|
||||
COPY . .
|
||||
|
||||
RUN cargo build --release
|
||||
CMD ["./target/release/rssify-cli", "/rss/"]
|
||||
28
README.md
Normal file
28
README.md
Normal file
|
|
@ -0,0 +1,28 @@
|
|||
# rssify - Transform some website I like too RSS feeds
|
||||
|
||||
- lwn paid articles
|
||||
- CISA KEV release
|
||||
|
||||
## Usage
|
||||
|
||||
### Using nix
|
||||
|
||||
```nix
|
||||
nix develop # to get the dev dependencies
|
||||
nix build
|
||||
nix run -- /tmp/rss/
|
||||
```
|
||||
|
||||
### Using podman
|
||||
|
||||
Use the `justfile` to run commands:
|
||||
|
||||
```bash
|
||||
just init # Will init the database, and build the app image
|
||||
just run # Will run the lwn-sub-snoozer to update the database and the RSS file
|
||||
```
|
||||
|
||||
## TODO
|
||||
|
||||
- [ ] Add volume to the db to store it if it crashes
|
||||
- [ ] Add tests
|
||||
16
default.nix
Normal file
16
default.nix
Normal file
|
|
@ -0,0 +1,16 @@
|
|||
{pkgs ? import <nixpkgs> {}}: let
|
||||
manifest = (pkgs.lib.importTOML ./Cargo.toml).package;
|
||||
in
|
||||
pkgs.rustPlatform.buildRustPackage {
|
||||
pname = manifest.name;
|
||||
version = manifest.version;
|
||||
cargoLock.lockFile = ./Cargo.lock;
|
||||
src = pkgs.lib.cleanSource ./.;
|
||||
|
||||
nativeBuildInputs = with pkgs; [
|
||||
pkg-config
|
||||
];
|
||||
buildInputs = with pkgs; [
|
||||
openssl
|
||||
];
|
||||
}
|
||||
27
flake.lock
generated
Normal file
27
flake.lock
generated
Normal file
|
|
@ -0,0 +1,27 @@
|
|||
{
|
||||
"nodes": {
|
||||
"nixpkgs": {
|
||||
"locked": {
|
||||
"lastModified": 1756217674,
|
||||
"narHash": "sha256-TH1SfSP523QI7kcPiNtMAEuwZR3Jdz0MCDXPs7TS8uo=",
|
||||
"owner": "NixOS",
|
||||
"repo": "nixpkgs",
|
||||
"rev": "4e7667a90c167f7a81d906e5a75cba4ad8bee620",
|
||||
"type": "github"
|
||||
},
|
||||
"original": {
|
||||
"owner": "NixOS",
|
||||
"ref": "nixos-25.05",
|
||||
"repo": "nixpkgs",
|
||||
"type": "github"
|
||||
}
|
||||
},
|
||||
"root": {
|
||||
"inputs": {
|
||||
"nixpkgs": "nixpkgs"
|
||||
}
|
||||
}
|
||||
},
|
||||
"root": "root",
|
||||
"version": 7
|
||||
}
|
||||
37
flake.nix
37
flake.nix
|
|
@ -1,39 +1,22 @@
|
|||
{
|
||||
description = "lwn-sub-snoozer";
|
||||
inputs = {
|
||||
flake-utils.url = "github:numtide/flake-utils";
|
||||
nixpkgs.url = "github:NixOS/nixpkgs/nixos-25.05";
|
||||
fenix = {
|
||||
url = "github:nix-community/fenix/monthly";
|
||||
inputs.nixpkgs.follows = "nixpkgs";
|
||||
};
|
||||
};
|
||||
|
||||
outputs = {
|
||||
self,
|
||||
nixpkgs,
|
||||
flake-utils,
|
||||
fenix,
|
||||
}:
|
||||
flake-utils.lib.eachDefaultSystem (system: let
|
||||
pkgs = nixpkgs.legacyPackages.${system};
|
||||
}: let
|
||||
supportedSystems = ["x86_64-linux"];
|
||||
forAllSystems = nixpkgs.lib.genAttrs supportedSystems;
|
||||
pkgsFor = nixpkgs.legacyPackages;
|
||||
in {
|
||||
packages = {
|
||||
default = self.packages.${system}.myapp;
|
||||
};
|
||||
# $ nix develop
|
||||
devShells.default = pkgs.mkShell {
|
||||
LD_LIBRARY_PATH = pkgs.lib.makeLibraryPath [pkgs.openssl];
|
||||
packages = [
|
||||
pkgs.pkg-config
|
||||
pkgs.openssl
|
||||
pkgs.nixpkgs-fmt
|
||||
pkgs.nil
|
||||
pkgs.postgresql
|
||||
|
||||
# Rust
|
||||
fenix.packages.${system}.default.toolchain
|
||||
];
|
||||
};
|
||||
packages = forAllSystems (system: {
|
||||
default = pkgsFor.${system}.callPackage ./. {};
|
||||
});
|
||||
devShells = forAllSystems (system: {
|
||||
default = pkgsFor.${system}.callPackage ./shell.nix {};
|
||||
});
|
||||
};
|
||||
}
|
||||
|
|
|
|||
42
justfile
42
justfile
|
|
@ -1,7 +1,41 @@
|
|||
set dotenv-load
|
||||
|
||||
POSTGRES_HOST := 'rssify_db'
|
||||
TEMP_DIR := `mktemp -d`
|
||||
|
||||
clean:
|
||||
rm -rf target
|
||||
podman network rm lwn-sub-snoozer || true
|
||||
podman stop rssify_db || true
|
||||
podman network rm rssify_network || true
|
||||
|
||||
db:
|
||||
podman network create lwn-sub-snoozer || true
|
||||
podman run --name postgres --rm -p 5432:5432 --network=lwn-sub-snoozer -e POSTGRES_DB=dev -e POSTGRES_USER=root -e POSTGRES_PASSWORD=root docker.io/postgres:alpine
|
||||
init_db:
|
||||
podman network create rssify_network || true
|
||||
podman run --rm -d --replace \
|
||||
--name {{POSTGRES_HOST}} \
|
||||
--network=rssify_network \
|
||||
-p 5432:5432 \
|
||||
--env-file .env \
|
||||
docker.io/postgres:alpine
|
||||
|
||||
build:
|
||||
podman build --tag rssify_app:latest .
|
||||
|
||||
init: init_db build
|
||||
|
||||
attach:
|
||||
podman run -it --rm \
|
||||
--name rssify_app \
|
||||
--network rssify_network \
|
||||
-e POSTGRES_HOST={{POSTGRES_HOST}} \
|
||||
--env-file .env \
|
||||
--volume {{TEMP_DIR}}:/rss \
|
||||
rssify_app:latest sh
|
||||
|
||||
run:
|
||||
podman run --rm \
|
||||
--name rssify_app \
|
||||
--network rssify_network \
|
||||
-e POSTGRES_HOST={{POSTGRES_HOST}} \
|
||||
--env-file .env \
|
||||
--volume {{TEMP_DIR}}:/rss \
|
||||
rssify_app:latest
|
||||
|
|
|
|||
11
shell.nix
Normal file
11
shell.nix
Normal file
|
|
@ -0,0 +1,11 @@
|
|||
{pkgs ? import <nixpkgs> {}}:
|
||||
pkgs.mkShell {
|
||||
inputsFrom = [(pkgs.callPackage ./default.nix {})];
|
||||
buildInputs = with pkgs; [
|
||||
rust-analyzer
|
||||
alejandra
|
||||
clippy
|
||||
nixd
|
||||
nil
|
||||
];
|
||||
}
|
||||
61
src/bin/cli.rs
Executable file
61
src/bin/cli.rs
Executable file
|
|
@ -0,0 +1,61 @@
|
|||
use clap::Parser;
|
||||
use rssify::{kev::KEV, lwn::LWN, source::Source};
|
||||
use std::env;
|
||||
use tokio::runtime::Runtime;
|
||||
use tokio_postgres;
|
||||
|
||||
#[derive(Parser)]
|
||||
struct Cli {
|
||||
path: std::path::PathBuf,
|
||||
}
|
||||
|
||||
fn main() {
|
||||
let rt = Runtime::new().unwrap();
|
||||
|
||||
let args = Cli::parse();
|
||||
let db_connection_string = match env::var("DATABASE_CONNECTION") {
|
||||
Ok(s) => s,
|
||||
_ => format!(
|
||||
"host={} dbname={} user={} password={}",
|
||||
env::var("POSTGRES_HOST").unwrap_or(String::from("localhost")),
|
||||
env::var("POSTGRES_USER").unwrap_or(String::from("root")),
|
||||
env::var("POSTGRES_USER").unwrap_or(String::from("root")),
|
||||
env::var("POSTGRES_PASSWORD").unwrap_or(String::from("root"))
|
||||
),
|
||||
};
|
||||
|
||||
println!("Connection string: {}", db_connection_string);
|
||||
|
||||
rt.block_on(async {
|
||||
// Connect to the database.
|
||||
if let Ok((client, connection)) =
|
||||
tokio_postgres::connect(&db_connection_string, tokio_postgres::NoTls).await
|
||||
{
|
||||
println!("Working...");
|
||||
|
||||
tokio::spawn(async move {
|
||||
if let Err(e) = connection.await {
|
||||
eprintln!("[x] Connection db error: {}", e);
|
||||
}
|
||||
});
|
||||
|
||||
println!("Connected");
|
||||
|
||||
if let Ok(_) = KEV::fetch(&client).await {
|
||||
println!("KEV fetched successfully");
|
||||
if let Ok(_) = KEV::publish(&client, args.path.clone()).await {
|
||||
println!("KEV updated successfully");
|
||||
}
|
||||
}
|
||||
|
||||
if let Ok(_) = LWN::fetch(&client).await {
|
||||
println!("LWN fetched successfully");
|
||||
if let Ok(_) = LWN::publish(&client, args.path.clone()).await {
|
||||
println!("LWN updated successfully");
|
||||
}
|
||||
}
|
||||
} else {
|
||||
eprintln!("[x] Could not connect with db");
|
||||
}
|
||||
});
|
||||
}
|
||||
80
src/error.rs
Normal file
80
src/error.rs
Normal file
|
|
@ -0,0 +1,80 @@
|
|||
use std::str::Utf8Error;
|
||||
|
||||
#[derive(Debug, Copy, Clone, Eq, PartialEq)]
|
||||
pub enum RssifyErrorKind {
|
||||
InvalidKevCatalogue,
|
||||
SaveXMLError,
|
||||
InvalidNaiveDate,
|
||||
Unknown,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct RssifyError {
|
||||
/// Kind of error
|
||||
kind: RssifyErrorKind,
|
||||
/// Associated message of the context
|
||||
pub message: String,
|
||||
}
|
||||
|
||||
impl RssifyError {
|
||||
pub fn new(kind: RssifyErrorKind, message: &str) -> Self {
|
||||
RssifyError {
|
||||
kind,
|
||||
message: String::from(message),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn kind(&self) -> RssifyErrorKind {
|
||||
self.kind
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub enum Error {
|
||||
/// Rssify error
|
||||
RssifyError(RssifyError),
|
||||
Utf8Error(Utf8Error),
|
||||
ReqwestError(reqwest::Error),
|
||||
RssError(rss::Error),
|
||||
}
|
||||
|
||||
impl Error {
|
||||
pub fn new(kind: RssifyErrorKind, message: &str) -> Self {
|
||||
Error::RssifyError(RssifyError::new(kind, message))
|
||||
}
|
||||
|
||||
pub fn invalid_kev_catalogue() -> Self {
|
||||
Error::RssifyError(RssifyError::new(
|
||||
RssifyErrorKind::InvalidKevCatalogue,
|
||||
"[KEV] Invalid KEV catalogue: failed to parse the JSON entry",
|
||||
))
|
||||
}
|
||||
|
||||
pub fn save_xml_error(path: &str) -> Self {
|
||||
Error::RssifyError(RssifyError::new(
|
||||
RssifyErrorKind::SaveXMLError,
|
||||
&format!("Failed to safe XML feed to {}", path),
|
||||
))
|
||||
}
|
||||
|
||||
pub fn invalid_naive_date(date: &str) -> Self {
|
||||
Error::RssifyError(RssifyError::new(
|
||||
RssifyErrorKind::InvalidNaiveDate,
|
||||
&format!("Failed to parse NaiveDate {}", date),
|
||||
))
|
||||
}
|
||||
}
|
||||
|
||||
impl From<reqwest::Error> for Error {
|
||||
fn from(e: reqwest::Error) -> Self {
|
||||
Self::ReqwestError(e)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<rss::Error> for Error {
|
||||
fn from(e: rss::Error) -> Self {
|
||||
Self::RssError(e)
|
||||
}
|
||||
}
|
||||
|
||||
pub type RssifyResult<T> = Result<T, Error>;
|
||||
127
src/kev.rs
Executable file
127
src/kev.rs
Executable file
|
|
@ -0,0 +1,127 @@
|
|||
use reqwest::get;
|
||||
use rss::{Guid, Item};
|
||||
use std::path::PathBuf;
|
||||
use tokio_postgres;
|
||||
|
||||
use crate::{
|
||||
error::{Error, RssifyResult},
|
||||
source::{self, Source},
|
||||
};
|
||||
|
||||
pub struct KEV;
|
||||
const URL: &str =
|
||||
"https://www.cisa.gov/sites/default/files/feeds/known_exploited_vulnerabilities.json";
|
||||
|
||||
impl Source for KEV {
|
||||
async fn fetch(client: &tokio_postgres::Client) -> RssifyResult<()> {
|
||||
if let Err(e) = client
|
||||
.query(
|
||||
"CREATE TABLE IF NOT EXISTS kev (
|
||||
title TEXT,
|
||||
cveID TEXT,
|
||||
description TEXT,
|
||||
dateAdded TEXT
|
||||
)",
|
||||
&[],
|
||||
)
|
||||
.await
|
||||
{
|
||||
eprintln!("[x] KEV table creation error: {}", e);
|
||||
}
|
||||
|
||||
let text = get(URL).await?.text().await?;
|
||||
|
||||
if let Ok(json) = json::parse(&text) {
|
||||
if let Ok(last_db_entry) = client
|
||||
.query(
|
||||
"SELECT dateAdded, cveID FROM kev ORDER BY dateAdded desc LIMIT 1",
|
||||
&[],
|
||||
)
|
||||
.await
|
||||
{
|
||||
let (last_db_cve_id, last_db_date_added): (&str, &str) = match last_db_entry.first()
|
||||
{
|
||||
Some(row) => (
|
||||
row.try_get("cveID")
|
||||
.map_err(|_| Error::invalid_kev_catalogue())?,
|
||||
row.try_get("dateAdded")
|
||||
.map_err(|_| Error::invalid_kev_catalogue())?,
|
||||
),
|
||||
_ => ("", ""),
|
||||
};
|
||||
|
||||
println!(
|
||||
"[DEBUG] Last db entry: {:?} - {:?}",
|
||||
last_db_cve_id, last_db_date_added
|
||||
);
|
||||
|
||||
let new_entries = json["vulnerabilities"]
|
||||
.members()
|
||||
.take_while(|entry| entry["cveID"] != last_db_cve_id);
|
||||
|
||||
for entry in new_entries {
|
||||
if let Err(e) = client
|
||||
.query(
|
||||
"INSERT INTO kev (
|
||||
cveID,
|
||||
title,
|
||||
dateAdded,
|
||||
description
|
||||
) VALUES (
|
||||
$1, $2, $3, $4)",
|
||||
&[
|
||||
&entry["cveID"].as_str(),
|
||||
&format!("{} - {}", entry["cveID"], entry["vulnerabilityName"]),
|
||||
&entry["dateAdded"].as_str(),
|
||||
&format!(
|
||||
"Description: {}\nRequired actions: {}\nNotes: {}",
|
||||
entry["shortDescription"],
|
||||
entry["requiredAction"],
|
||||
entry["notes"]
|
||||
),
|
||||
],
|
||||
)
|
||||
.await
|
||||
{
|
||||
eprintln!("[x] Error insert: {}", e);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn publish(client: &tokio_postgres::Client, mut path: PathBuf) -> RssifyResult<()> {
|
||||
let mut items: Vec<Item> = Vec::new();
|
||||
if let Ok(entries) = client
|
||||
.query("SELECT * FROM kev ORDER BY dateAdded desc LIMIT 15", &[])
|
||||
.await
|
||||
{
|
||||
for entry in entries {
|
||||
let mut guid = Guid::default();
|
||||
guid.set_value(entry.get::<_, &str>("cveID"));
|
||||
|
||||
items.push(
|
||||
rss::ItemBuilder::default()
|
||||
.title(Some(entry.get("title")))
|
||||
.link(Some(String::from(URL)))
|
||||
.guid(Some(guid))
|
||||
.pub_date(Some(entry.get("dateAdded")))
|
||||
.description(Some(entry.get("description")))
|
||||
.build(),
|
||||
);
|
||||
}
|
||||
};
|
||||
|
||||
let channel = rss::ChannelBuilder::default()
|
||||
.title("CISA KEV")
|
||||
.link(URL)
|
||||
.description("CISA Catalog of Known Exploited Vulnerabilities")
|
||||
.items(items)
|
||||
.build();
|
||||
|
||||
path.push("kev.xml");
|
||||
source::save_xml(&channel.to_string(), &path)
|
||||
}
|
||||
}
|
||||
4
src/lib.rs
Normal file
4
src/lib.rs
Normal file
|
|
@ -0,0 +1,4 @@
|
|||
pub mod error;
|
||||
pub mod kev;
|
||||
pub mod lwn;
|
||||
pub mod source;
|
||||
158
src/lwn.rs
Executable file
158
src/lwn.rs
Executable file
|
|
@ -0,0 +1,158 @@
|
|||
use chrono::{NaiveDate, NaiveDateTime, TimeZone, prelude::Local};
|
||||
use regex::Regex;
|
||||
use reqwest::get;
|
||||
use rss::Item;
|
||||
use scraper::{Html, Selector};
|
||||
use std::path::PathBuf;
|
||||
use tokio_postgres;
|
||||
|
||||
use crate::error::{Error, RssifyResult};
|
||||
use crate::source::{self, Source};
|
||||
|
||||
pub struct LWN;
|
||||
|
||||
impl LWN {
|
||||
async fn fetch_release_date(url: &str) -> RssifyResult<Option<NaiveDateTime>> {
|
||||
let response = get(url).await?.text().await?;
|
||||
|
||||
if let Some(article_text) = Html::parse_document(&response)
|
||||
.select(&Selector::parse("div.ArticleText").unwrap())
|
||||
.next()
|
||||
{
|
||||
if let Some(yes) = article_text.select(&Selector::parse("p").unwrap()).last() {
|
||||
let re = Regex::new(
|
||||
r#"(?m)\(Alternatively, this item will become freely\s*available on ([A-Z][a-z]+ [0-9]{1,2}, [0-9]{4})\)"#,
|
||||
).unwrap();
|
||||
if let Some(cap) = re.captures(&yes.inner_html()) {
|
||||
if let Some(date) = cap.get(1) {
|
||||
return match NaiveDate::parse_from_str(date.as_str(), "%B %d, %Y") {
|
||||
Ok(date) => Ok(Some(date.and_hms_opt(0, 0, 0).unwrap())),
|
||||
Err(_) => Err(Error::invalid_naive_date(date.as_str())),
|
||||
};
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(None)
|
||||
}
|
||||
|
||||
async fn fetch_paid_article_urls() -> RssifyResult<Vec<rss::Item>> {
|
||||
let response = get("https://lwn.net/headlines/rss").await?.bytes().await?;
|
||||
let channel = rss::Channel::read_from(&response[..])?;
|
||||
|
||||
Ok(channel
|
||||
.items()
|
||||
.iter()
|
||||
.filter(|i| i.title().unwrap_or("").starts_with("[$]"))
|
||||
.map(|i| i.clone())
|
||||
.collect::<Vec<rss::Item>>())
|
||||
}
|
||||
}
|
||||
|
||||
impl Source for LWN {
|
||||
async fn fetch(client: &tokio_postgres::Client) -> RssifyResult<()> {
|
||||
if let Err(e) = client
|
||||
.query(
|
||||
"CREATE TABLE IF NOT EXISTS lwn (
|
||||
link TEXT,
|
||||
title TEXT,
|
||||
description TEXT,
|
||||
pub_date TEXT,
|
||||
release_date TEXT
|
||||
)",
|
||||
&[],
|
||||
)
|
||||
.await
|
||||
{
|
||||
eprintln!("[x] LWN table creation error: {}", e);
|
||||
}
|
||||
|
||||
// Get new [$] articles
|
||||
if let Ok(items) = LWN::fetch_paid_article_urls().await {
|
||||
for item in items {
|
||||
if let Some(link) = item.link() {
|
||||
match client
|
||||
.query_opt("SELECT release_date FROM lwn WHERE link = $1", &[&link])
|
||||
.await
|
||||
{
|
||||
Ok(None) => {
|
||||
if let Ok(Some(date)) = LWN::fetch_release_date(&link).await {
|
||||
if let (Some(title), Some(description), Some(pub_date)) =
|
||||
(item.title(), item.description(), item.pub_date())
|
||||
{
|
||||
println!("Adding new article to db: {}", link);
|
||||
|
||||
if let Err(e) = client
|
||||
.query(
|
||||
"INSERT INTO lwn (
|
||||
link,
|
||||
title,
|
||||
description,
|
||||
pub_date,
|
||||
release_date
|
||||
) VALUES (
|
||||
$1, $2, $3, $4, $5)",
|
||||
&[
|
||||
&link,
|
||||
&title,
|
||||
&description,
|
||||
&pub_date,
|
||||
&date.to_string(),
|
||||
],
|
||||
)
|
||||
.await
|
||||
{
|
||||
eprintln!("[x] Error insert: {}", e);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
_ => (),
|
||||
}
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn publish(client: &tokio_postgres::Client, mut path: PathBuf) -> RssifyResult<()> {
|
||||
let mut items: Vec<Item> = Vec::new();
|
||||
|
||||
if let Ok(saved_articles) = client.query("SELECT * FROM lwn", &[]).await {
|
||||
for row in saved_articles {
|
||||
let date: &str = row.get("release_date");
|
||||
if let Ok(date) = NaiveDateTime::parse_from_str(date, "%Y-%m-%d %H:%M:%S") {
|
||||
if Local.from_local_datetime(&date).unwrap() < Local::now() {
|
||||
let link: String = row.get("link");
|
||||
let guid = rss::GuidBuilder::default()
|
||||
.value(link.clone())
|
||||
.permalink(true)
|
||||
.build();
|
||||
|
||||
items.push(
|
||||
rss::ItemBuilder::default()
|
||||
.title(Some(row.get("title")))
|
||||
.link(Some(link))
|
||||
.guid(Some(guid))
|
||||
.pub_date(Some(Local::now().to_rfc2822()))
|
||||
.description(Some(row.get("description")))
|
||||
.build(),
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
let channel = rss::ChannelBuilder::default()
|
||||
.title("[$] lwn.net")
|
||||
.link("https://dawl.fr/lwn.net/rss.xml")
|
||||
.description("RSS flux of lwn.net paid articles that are freely released.")
|
||||
.items(items)
|
||||
.build();
|
||||
|
||||
path.push("lwn.xml");
|
||||
source::save_xml(&channel.to_string(), &path)
|
||||
}
|
||||
}
|
||||
184
src/main.rs
184
src/main.rs
|
|
@ -1,184 +0,0 @@
|
|||
use chrono::{NaiveDate, NaiveDateTime, TimeZone, prelude::Local};
|
||||
use regex::Regex;
|
||||
use reqwest::get;
|
||||
use scraper::{Html, Selector};
|
||||
use std::error::Error;
|
||||
use std::fs::File;
|
||||
use std::io::BufReader;
|
||||
use std::io::Write;
|
||||
use tokio::{
|
||||
runtime::Runtime,
|
||||
sync::mpsc::{UnboundedReceiver, UnboundedSender, unbounded_channel},
|
||||
};
|
||||
use tokio_postgres;
|
||||
|
||||
fn main() {
|
||||
let rt = Runtime::new().unwrap();
|
||||
|
||||
rt.block_on(async {
|
||||
// Connect to the database.
|
||||
if let Ok((client, connection)) = tokio_postgres::connect(
|
||||
"host=localhost dbname=dev user=root password=root",
|
||||
tokio_postgres::NoTls,
|
||||
)
|
||||
.await
|
||||
{
|
||||
tokio::spawn(async move {
|
||||
if let Err(e) = connection.await {
|
||||
eprintln!("connection error: {}", e);
|
||||
}
|
||||
});
|
||||
|
||||
if let Err(e) = client
|
||||
.query(
|
||||
"CREATE TABLE IF NOT EXISTS articles (
|
||||
link TEXT,
|
||||
title TEXT,
|
||||
description TEXT,
|
||||
pub_date TEXT,
|
||||
release_date TEXT
|
||||
)",
|
||||
&[],
|
||||
)
|
||||
.await
|
||||
{
|
||||
eprintln!("table creation error: {}", e);
|
||||
}
|
||||
|
||||
// Get new [$] articles
|
||||
if let Ok(items) = fetch_paid_article_urls().await {
|
||||
for item in items {
|
||||
if let Some(link) = item.link() {
|
||||
match client
|
||||
.query_opt(
|
||||
"SELECT release_date FROM articles WHERE link = $1",
|
||||
&[&link],
|
||||
)
|
||||
.await
|
||||
{
|
||||
Ok(None) => {
|
||||
if let Ok(Some(date)) = fetch_release_date(&link).await {
|
||||
if let (Some(title), Some(description), Some(pub_date)) =
|
||||
(item.title(), item.description(), item.pub_date())
|
||||
{
|
||||
println!("Adding new article to db: {}", link);
|
||||
|
||||
if let Err(e) = client
|
||||
.query(
|
||||
"INSERT INTO articles (
|
||||
link,
|
||||
title,
|
||||
description,
|
||||
pub_date,
|
||||
release_date
|
||||
) VALUES (
|
||||
$1, $2, $3, $4, $5)",
|
||||
&[
|
||||
&link,
|
||||
&title,
|
||||
&description,
|
||||
&pub_date,
|
||||
&date.to_string(),
|
||||
],
|
||||
)
|
||||
.await
|
||||
{
|
||||
eprintln!("Error insert: {}", e);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
_ => (),
|
||||
}
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
// TODO: How to manage the RSS xml file
|
||||
let mut channel = match File::open("rss.xml") {
|
||||
Ok(file) => rss::Channel::read_from(BufReader::new(file)).unwrap(),
|
||||
_ => rss::ChannelBuilder::default()
|
||||
.title("[$] lwn.net")
|
||||
.link("https://dawl.fr/lwn.net/rss.xml")
|
||||
.description("RSS flux of lwn.net paid articles that are freely released.")
|
||||
.items(vec![])
|
||||
.build(),
|
||||
};
|
||||
let mut items = channel.clone().into_items();
|
||||
|
||||
if let Ok(saved_articles) = client.query("SELECT * FROM articles", &[]).await {
|
||||
for row in saved_articles {
|
||||
let date: &str = row.get("release_date");
|
||||
if let Ok(date) = NaiveDateTime::parse_from_str(date, "%Y-%m-%d %H:%M:%S") {
|
||||
if Local.from_local_datetime(&date).unwrap() < Local::now() {
|
||||
let link: String = row.get("title");
|
||||
let guid = rss::GuidBuilder::default()
|
||||
.value(link.clone())
|
||||
.permalink(true)
|
||||
.build();
|
||||
|
||||
items.push(
|
||||
rss::ItemBuilder::default()
|
||||
.title(Some(row.get("title")))
|
||||
.link(Some(link))
|
||||
.guid(Some(guid))
|
||||
.pub_date(Some(Local::now().to_rfc2822()))
|
||||
.description(Some(row.get("description")))
|
||||
.build(),
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
channel.set_items(items);
|
||||
if let Err(e) = save_xml(&channel.to_string()) {
|
||||
eprintln!("failed to save xml: {}", e);
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
fn save_xml(rss_string: &str) -> std::io::Result<()> {
|
||||
let mut file = File::create("paid_lwn_net_rss.xml")?;
|
||||
file.write_all(rss_string.as_bytes())?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn fetch_release_date(url: &str) -> Result<Option<NaiveDateTime>, Box<dyn Error>> {
|
||||
let response = get(url).await?.text().await?;
|
||||
|
||||
if let Some(article_text) = Html::parse_document(&response)
|
||||
.select(&Selector::parse("div.ArticleText")?)
|
||||
.next()
|
||||
{
|
||||
if let Some(yes) = article_text.select(&Selector::parse("p")?).last() {
|
||||
let re = Regex::new(
|
||||
r#"(?m)\(Alternatively, this item will become freely\s*available on ([A-Z][a-z]+ [0-9]{1,2}, [0-9]{4})\)"#,
|
||||
)?;
|
||||
if let Some(cap) = re.captures(&yes.inner_html()) {
|
||||
if let Some(date) = cap.get(1) {
|
||||
return Ok(Some(
|
||||
NaiveDate::parse_from_str(date.as_str(), "%B %d, %Y")?
|
||||
.and_hms_opt(0, 0, 0)
|
||||
.unwrap(),
|
||||
));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(None)
|
||||
}
|
||||
|
||||
async fn fetch_paid_article_urls() -> Result<Vec<rss::Item>, Box<dyn Error>> {
|
||||
let response = get("https://lwn.net/headlines/rss").await?.bytes().await?;
|
||||
let channel = rss::Channel::read_from(&response[..])?;
|
||||
|
||||
Ok(channel
|
||||
.items()
|
||||
.iter()
|
||||
.filter(|i| i.title().unwrap_or("").starts_with("[$]"))
|
||||
.map(|i| i.clone())
|
||||
.collect::<Vec<rss::Item>>())
|
||||
}
|
||||
22
src/source.rs
Normal file
22
src/source.rs
Normal file
|
|
@ -0,0 +1,22 @@
|
|||
use crate::error::{Error, RssifyResult};
|
||||
use std::fs::File;
|
||||
use std::io::Write;
|
||||
use std::path::PathBuf;
|
||||
|
||||
pub trait Source {
|
||||
fn fetch(client: &tokio_postgres::Client) -> impl Future<Output = RssifyResult<()>>;
|
||||
fn publish(
|
||||
client: &tokio_postgres::Client,
|
||||
path: PathBuf,
|
||||
) -> impl Future<Output = RssifyResult<()>>;
|
||||
}
|
||||
|
||||
pub fn save_xml(rss_string: &str, path: &std::path::PathBuf) -> RssifyResult<()> {
|
||||
if let Ok(mut file) = File::create(path) {
|
||||
if file.write_all(rss_string.as_bytes()).is_ok() {
|
||||
return Ok(());
|
||||
}
|
||||
}
|
||||
|
||||
Err(Error::save_xml_error(path.to_str().unwrap()))
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue