Add KEV
This commit is contained in:
parent
23b263a50b
commit
6f85efcd26
13 changed files with 3580 additions and 219 deletions
3082
Cargo.lock
generated
Executable file
3082
Cargo.lock
generated
Executable file
File diff suppressed because it is too large
Load diff
12
Cargo.toml
12
Cargo.toml
|
|
@ -1,8 +1,17 @@
|
|||
[package]
|
||||
name = "lwn-sub-snoozer"
|
||||
name = "rssify"
|
||||
version = "0.1.0"
|
||||
edition = "2024"
|
||||
|
||||
[lib]
|
||||
name = "rssify"
|
||||
path = "src/lib.rs"
|
||||
|
||||
[[bin]]
|
||||
name = "rssify-cli"
|
||||
path = "src/bin/cli.rs"
|
||||
# required-features = ["cli"]
|
||||
|
||||
[dependencies]
|
||||
tokio = { version = "1.46.0", features = ["full"] }
|
||||
futures = "0.3.31"
|
||||
|
|
@ -14,3 +23,4 @@ rss = "2.0.12"
|
|||
mini-redis = "0.4.1"
|
||||
tokio-postgres = "0.7.13"
|
||||
clap = { version = "4.5.42", features = ["derive"] }
|
||||
json = "0.12.4"
|
||||
|
|
|
|||
|
|
@ -4,4 +4,4 @@ WORKDIR /src
|
|||
COPY . .
|
||||
|
||||
RUN cargo build --release
|
||||
CMD ["./target/release/lwn-sub-snoozer", "/rss/lwn-sub.xml"]
|
||||
CMD ["./target/release/rssify-cli", "/rss/"]
|
||||
|
|
|
|||
|
|
@ -1,3 +1,8 @@
|
|||
# rssify - Transform some website I like too RSS feeds
|
||||
|
||||
- lwn paid articles
|
||||
- CISA KEV release
|
||||
|
||||
## Usage
|
||||
|
||||
Use the `justfile` to run commands:
|
||||
|
|
|
|||
18
flake.nix
18
flake.nix
|
|
@ -3,17 +3,12 @@
|
|||
inputs = {
|
||||
flake-utils.url = "github:numtide/flake-utils";
|
||||
nixpkgs.url = "github:NixOS/nixpkgs/nixos-25.05";
|
||||
fenix = {
|
||||
url = "github:nix-community/fenix/monthly";
|
||||
inputs.nixpkgs.follows = "nixpkgs";
|
||||
};
|
||||
};
|
||||
|
||||
outputs = {
|
||||
self,
|
||||
nixpkgs,
|
||||
flake-utils,
|
||||
fenix,
|
||||
}:
|
||||
flake-utils.lib.eachDefaultSystem (system: let
|
||||
pkgs = nixpkgs.legacyPackages.${system};
|
||||
|
|
@ -27,12 +22,19 @@
|
|||
packages = [
|
||||
pkgs.pkg-config
|
||||
pkgs.openssl
|
||||
pkgs.nixpkgs-fmt
|
||||
pkgs.nil
|
||||
pkgs.postgresql
|
||||
|
||||
# Nix
|
||||
pkgs.nixpkgs-fmt
|
||||
pkgs.nil
|
||||
pkgs.nixd
|
||||
pkgs.alejandra
|
||||
|
||||
# Rust
|
||||
fenix.packages.${system}.default.toolchain
|
||||
pkgs.rustfmt
|
||||
pkgs.rustc
|
||||
pkgs.cargo
|
||||
pkgs.rust-analyzer
|
||||
];
|
||||
};
|
||||
});
|
||||
|
|
|
|||
33
justfile
33
justfile
|
|
@ -1,30 +1,41 @@
|
|||
set dotenv-load
|
||||
|
||||
POSTGRES_HOST := 'lwn-sub-snoozer_db'
|
||||
POSTGRES_HOST := 'rssify_db'
|
||||
TEMP_DIR := `mktemp -d`
|
||||
|
||||
clean:
|
||||
rm -rf target
|
||||
podman network rm lwn-sub-snoozer_network || true
|
||||
podman stop rssify_db || true
|
||||
podman network rm rssify_network || true
|
||||
|
||||
init_db:
|
||||
podman network create lwn-sub-snoozer_network || true
|
||||
podman network create rssify_network || true
|
||||
podman run --rm -d --replace \
|
||||
--name {{POSTGRES_HOST}} \
|
||||
--network=lwn-sub-snoozer_network \
|
||||
--network=rssify_network \
|
||||
-p 5432:5432 \
|
||||
--env-file .env \
|
||||
docker.io/postgres:alpine
|
||||
|
||||
init_app:
|
||||
podman build --tag lwn-sub-snoozer_app:latest .
|
||||
build:
|
||||
podman build --tag rssify_app:latest .
|
||||
|
||||
init: init_db init_app
|
||||
init: init_db build
|
||||
|
||||
attach:
|
||||
podman run -it --rm \
|
||||
--name rssify_app \
|
||||
--network rssify_network \
|
||||
-e POSTGRES_HOST={{POSTGRES_HOST}} \
|
||||
--env-file .env \
|
||||
--volume {{TEMP_DIR}}:/rss \
|
||||
rssify_app:latest sh
|
||||
|
||||
run:
|
||||
podman run --rm \
|
||||
--name lwn-sub-snoozer_app \
|
||||
--network lwn-sub-snoozer_network \
|
||||
--name rssify_app \
|
||||
--network rssify_network \
|
||||
-e POSTGRES_HOST={{POSTGRES_HOST}} \
|
||||
--env-file .env \
|
||||
--volume /tmp/lwn_sub:/rss \
|
||||
lwn-sub-snoozer_app:latest
|
||||
--volume {{TEMP_DIR}}:/rss \
|
||||
rssify_app:latest
|
||||
|
|
|
|||
58
src/bin/cli.rs
Executable file
58
src/bin/cli.rs
Executable file
|
|
@ -0,0 +1,58 @@
|
|||
use clap::Parser;
|
||||
use rssify::{kev::KEV, lwn::LWN, source::Source};
|
||||
use std::env;
|
||||
use tokio::runtime::Runtime;
|
||||
use tokio_postgres;
|
||||
|
||||
#[derive(Parser)]
|
||||
struct Cli {
|
||||
path: std::path::PathBuf,
|
||||
}
|
||||
|
||||
fn main() {
|
||||
let rt = Runtime::new().unwrap();
|
||||
|
||||
let args = Cli::parse();
|
||||
let db_connection_string = &format!(
|
||||
"host={} dbname={} user={} password={}",
|
||||
env::var("POSTGRES_HOST").unwrap_or(String::from("localhost")),
|
||||
env::var("POSTGRES_USER").unwrap_or(String::from("root")),
|
||||
env::var("POSTGRES_USER").unwrap_or(String::from("root")),
|
||||
env::var("POSTGRES_PASSWORD").unwrap_or(String::from("root"))
|
||||
);
|
||||
|
||||
println!("Connection string: {}", db_connection_string);
|
||||
|
||||
rt.block_on(async {
|
||||
// Connect to the database.
|
||||
if let Ok((client, connection)) =
|
||||
tokio_postgres::connect(db_connection_string, tokio_postgres::NoTls).await
|
||||
{
|
||||
println!("Working...");
|
||||
|
||||
tokio::spawn(async move {
|
||||
if let Err(e) = connection.await {
|
||||
eprintln!("[x] Connection db error: {}", e);
|
||||
}
|
||||
});
|
||||
|
||||
println!("Connected");
|
||||
|
||||
if let Ok(_) = KEV::fetch(&client).await {
|
||||
println!("KEV fetched successfully");
|
||||
if let Ok(_) = KEV::publish(&client, args.path.clone()).await {
|
||||
println!("KEV updated successfully");
|
||||
}
|
||||
}
|
||||
|
||||
if let Ok(_) = LWN::fetch(&client).await {
|
||||
println!("LWN fetched successfully");
|
||||
if let Ok(_) = LWN::publish(&client, args.path.clone()).await {
|
||||
println!("LWN updated successfully");
|
||||
}
|
||||
}
|
||||
} else {
|
||||
eprintln!("[x] Could not connect with db");
|
||||
}
|
||||
});
|
||||
}
|
||||
80
src/error.rs
Normal file
80
src/error.rs
Normal file
|
|
@ -0,0 +1,80 @@
|
|||
use std::str::Utf8Error;
|
||||
|
||||
#[derive(Debug, Copy, Clone, Eq, PartialEq)]
|
||||
pub enum RssifyErrorKind {
|
||||
InvalidKevCatalogue,
|
||||
SaveXMLError,
|
||||
InvalidNaiveDate,
|
||||
Unknown,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct RssifyError {
|
||||
/// Kind of error
|
||||
kind: RssifyErrorKind,
|
||||
/// Associated message of the context
|
||||
pub message: String,
|
||||
}
|
||||
|
||||
impl RssifyError {
|
||||
pub fn new(kind: RssifyErrorKind, message: &str) -> Self {
|
||||
RssifyError {
|
||||
kind,
|
||||
message: String::from(message),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn kind(&self) -> RssifyErrorKind {
|
||||
self.kind
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub enum Error {
|
||||
/// Rssify error
|
||||
RssifyError(RssifyError),
|
||||
Utf8Error(Utf8Error),
|
||||
ReqwestError(reqwest::Error),
|
||||
RssError(rss::Error),
|
||||
}
|
||||
|
||||
impl Error {
|
||||
pub fn new(kind: RssifyErrorKind, message: &str) -> Self {
|
||||
Error::RssifyError(RssifyError::new(kind, message))
|
||||
}
|
||||
|
||||
pub fn invalid_kev_catalogue() -> Self {
|
||||
Error::RssifyError(RssifyError::new(
|
||||
RssifyErrorKind::InvalidKevCatalogue,
|
||||
"[KEV] Invalid KEV catalogue: failed to parse the JSON entry",
|
||||
))
|
||||
}
|
||||
|
||||
pub fn save_xml_error(path: &str) -> Self {
|
||||
Error::RssifyError(RssifyError::new(
|
||||
RssifyErrorKind::SaveXMLError,
|
||||
&format!("Failed to safe XML feed to {}", path),
|
||||
))
|
||||
}
|
||||
|
||||
pub fn invalid_naive_date(date: &str) -> Self {
|
||||
Error::RssifyError(RssifyError::new(
|
||||
RssifyErrorKind::InvalidNaiveDate,
|
||||
&format!("Failed to parse NaiveDate {}", date),
|
||||
))
|
||||
}
|
||||
}
|
||||
|
||||
impl From<reqwest::Error> for Error {
|
||||
fn from(e: reqwest::Error) -> Self {
|
||||
Self::ReqwestError(e)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<rss::Error> for Error {
|
||||
fn from(e: rss::Error) -> Self {
|
||||
Self::RssError(e)
|
||||
}
|
||||
}
|
||||
|
||||
pub type RssifyResult<T> = Result<T, Error>;
|
||||
127
src/kev.rs
Executable file
127
src/kev.rs
Executable file
|
|
@ -0,0 +1,127 @@
|
|||
use reqwest::get;
|
||||
use rss::{Guid, Item};
|
||||
use std::path::PathBuf;
|
||||
use tokio_postgres;
|
||||
|
||||
use crate::{
|
||||
error::{Error, RssifyResult},
|
||||
source::{self, Source},
|
||||
};
|
||||
|
||||
pub struct KEV;
|
||||
const URL: &str =
|
||||
"https://www.cisa.gov/sites/default/files/feeds/known_exploited_vulnerabilities.json";
|
||||
|
||||
impl Source for KEV {
|
||||
async fn fetch(client: &tokio_postgres::Client) -> RssifyResult<()> {
|
||||
if let Err(e) = client
|
||||
.query(
|
||||
"CREATE TABLE IF NOT EXISTS kev (
|
||||
title TEXT,
|
||||
cveID TEXT,
|
||||
description TEXT,
|
||||
dateAdded TEXT
|
||||
)",
|
||||
&[],
|
||||
)
|
||||
.await
|
||||
{
|
||||
eprintln!("[x] KEV table creation error: {}", e);
|
||||
}
|
||||
|
||||
let text = get(URL).await?.text().await?;
|
||||
|
||||
if let Ok(json) = json::parse(&text) {
|
||||
if let Ok(last_db_entry) = client
|
||||
.query(
|
||||
"SELECT dateAdded, cveID FROM kev ORDER BY dateAdded desc LIMIT 1",
|
||||
&[],
|
||||
)
|
||||
.await
|
||||
{
|
||||
let (last_db_cve_id, last_db_date_added): (&str, &str) = match last_db_entry.first()
|
||||
{
|
||||
Some(row) => (
|
||||
row.try_get("cveID")
|
||||
.map_err(|_| Error::invalid_kev_catalogue())?,
|
||||
row.try_get("dateAdded")
|
||||
.map_err(|_| Error::invalid_kev_catalogue())?,
|
||||
),
|
||||
_ => ("", ""),
|
||||
};
|
||||
|
||||
println!(
|
||||
"[DEBUG] Last db entry: {:?} - {:?}",
|
||||
last_db_cve_id, last_db_date_added
|
||||
);
|
||||
|
||||
let new_entries = json["vulnerabilities"]
|
||||
.members()
|
||||
.take_while(|entry| entry["cveID"] != last_db_cve_id);
|
||||
|
||||
for entry in new_entries {
|
||||
if let Err(e) = client
|
||||
.query(
|
||||
"INSERT INTO kev (
|
||||
cveID,
|
||||
title,
|
||||
dateAdded,
|
||||
description
|
||||
) VALUES (
|
||||
$1, $2, $3, $4)",
|
||||
&[
|
||||
&entry["cveID"].as_str(),
|
||||
&format!("{} - {}", entry["cveID"], entry["vulnerabilityName"]),
|
||||
&entry["dateAdded"].as_str(),
|
||||
&format!(
|
||||
"Description: {}\nRequired actions: {}\nNotes: {}",
|
||||
entry["shortDescription"],
|
||||
entry["requiredAction"],
|
||||
entry["notes"]
|
||||
),
|
||||
],
|
||||
)
|
||||
.await
|
||||
{
|
||||
eprintln!("[x] Error insert: {}", e);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn publish(client: &tokio_postgres::Client, mut path: PathBuf) -> RssifyResult<()> {
|
||||
let mut items: Vec<Item> = Vec::new();
|
||||
if let Ok(entries) = client
|
||||
.query("SELECT * FROM kev ORDER BY dateAdded desc LIMIT 15", &[])
|
||||
.await
|
||||
{
|
||||
for entry in entries {
|
||||
let mut guid = Guid::default();
|
||||
guid.set_value(entry.get::<_, &str>("cveID"));
|
||||
|
||||
items.push(
|
||||
rss::ItemBuilder::default()
|
||||
.title(Some(entry.get("title")))
|
||||
.link(Some(String::from(URL)))
|
||||
.guid(Some(guid))
|
||||
.pub_date(Some(entry.get("dateAdded")))
|
||||
.description(Some(entry.get("description")))
|
||||
.build(),
|
||||
);
|
||||
}
|
||||
};
|
||||
|
||||
let channel = rss::ChannelBuilder::default()
|
||||
.title("CISA KEV")
|
||||
.link(URL)
|
||||
.description("CISA Catalog of Known Exploited Vulnerabilities")
|
||||
.items(items)
|
||||
.build();
|
||||
|
||||
path.push("kev.xml");
|
||||
source::save_xml(&channel.to_string(), &path)
|
||||
}
|
||||
}
|
||||
4
src/lib.rs
Normal file
4
src/lib.rs
Normal file
|
|
@ -0,0 +1,4 @@
|
|||
pub mod error;
|
||||
pub mod kev;
|
||||
pub mod lwn;
|
||||
pub mod source;
|
||||
158
src/lwn.rs
Executable file
158
src/lwn.rs
Executable file
|
|
@ -0,0 +1,158 @@
|
|||
use chrono::{NaiveDate, NaiveDateTime, TimeZone, prelude::Local};
|
||||
use regex::Regex;
|
||||
use reqwest::get;
|
||||
use rss::Item;
|
||||
use scraper::{Html, Selector};
|
||||
use std::path::PathBuf;
|
||||
use tokio_postgres;
|
||||
|
||||
use crate::error::{Error, RssifyResult};
|
||||
use crate::source::{self, Source};
|
||||
|
||||
pub struct LWN;
|
||||
|
||||
impl LWN {
|
||||
async fn fetch_release_date(url: &str) -> RssifyResult<Option<NaiveDateTime>> {
|
||||
let response = get(url).await?.text().await?;
|
||||
|
||||
if let Some(article_text) = Html::parse_document(&response)
|
||||
.select(&Selector::parse("div.ArticleText").unwrap())
|
||||
.next()
|
||||
{
|
||||
if let Some(yes) = article_text.select(&Selector::parse("p").unwrap()).last() {
|
||||
let re = Regex::new(
|
||||
r#"(?m)\(Alternatively, this item will become freely\s*available on ([A-Z][a-z]+ [0-9]{1,2}, [0-9]{4})\)"#,
|
||||
).unwrap();
|
||||
if let Some(cap) = re.captures(&yes.inner_html()) {
|
||||
if let Some(date) = cap.get(1) {
|
||||
return match NaiveDate::parse_from_str(date.as_str(), "%B %d, %Y") {
|
||||
Ok(date) => Ok(Some(date.and_hms_opt(0, 0, 0).unwrap())),
|
||||
Err(_) => Err(Error::invalid_naive_date(date.as_str())),
|
||||
};
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(None)
|
||||
}
|
||||
|
||||
async fn fetch_paid_article_urls() -> RssifyResult<Vec<rss::Item>> {
|
||||
let response = get("https://lwn.net/headlines/rss").await?.bytes().await?;
|
||||
let channel = rss::Channel::read_from(&response[..])?;
|
||||
|
||||
Ok(channel
|
||||
.items()
|
||||
.iter()
|
||||
.filter(|i| i.title().unwrap_or("").starts_with("[$]"))
|
||||
.map(|i| i.clone())
|
||||
.collect::<Vec<rss::Item>>())
|
||||
}
|
||||
}
|
||||
|
||||
impl Source for LWN {
|
||||
async fn fetch(client: &tokio_postgres::Client) -> RssifyResult<()> {
|
||||
if let Err(e) = client
|
||||
.query(
|
||||
"CREATE TABLE IF NOT EXISTS lwn (
|
||||
link TEXT,
|
||||
title TEXT,
|
||||
description TEXT,
|
||||
pub_date TEXT,
|
||||
release_date TEXT
|
||||
)",
|
||||
&[],
|
||||
)
|
||||
.await
|
||||
{
|
||||
eprintln!("[x] LWN table creation error: {}", e);
|
||||
}
|
||||
|
||||
// Get new [$] articles
|
||||
if let Ok(items) = LWN::fetch_paid_article_urls().await {
|
||||
for item in items {
|
||||
if let Some(link) = item.link() {
|
||||
match client
|
||||
.query_opt("SELECT release_date FROM lwn WHERE link = $1", &[&link])
|
||||
.await
|
||||
{
|
||||
Ok(None) => {
|
||||
if let Ok(Some(date)) = LWN::fetch_release_date(&link).await {
|
||||
if let (Some(title), Some(description), Some(pub_date)) =
|
||||
(item.title(), item.description(), item.pub_date())
|
||||
{
|
||||
println!("Adding new article to db: {}", link);
|
||||
|
||||
if let Err(e) = client
|
||||
.query(
|
||||
"INSERT INTO lwn (
|
||||
link,
|
||||
title,
|
||||
description,
|
||||
pub_date,
|
||||
release_date
|
||||
) VALUES (
|
||||
$1, $2, $3, $4, $5)",
|
||||
&[
|
||||
&link,
|
||||
&title,
|
||||
&description,
|
||||
&pub_date,
|
||||
&date.to_string(),
|
||||
],
|
||||
)
|
||||
.await
|
||||
{
|
||||
eprintln!("[x] Error insert: {}", e);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
_ => (),
|
||||
}
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn publish(client: &tokio_postgres::Client, mut path: PathBuf) -> RssifyResult<()> {
|
||||
let mut items: Vec<Item> = Vec::new();
|
||||
|
||||
if let Ok(saved_articles) = client.query("SELECT * FROM lwn", &[]).await {
|
||||
for row in saved_articles {
|
||||
let date: &str = row.get("release_date");
|
||||
if let Ok(date) = NaiveDateTime::parse_from_str(date, "%Y-%m-%d %H:%M:%S") {
|
||||
if Local.from_local_datetime(&date).unwrap() < Local::now() {
|
||||
let link: String = row.get("title");
|
||||
let guid = rss::GuidBuilder::default()
|
||||
.value(link.clone())
|
||||
.permalink(true)
|
||||
.build();
|
||||
|
||||
items.push(
|
||||
rss::ItemBuilder::default()
|
||||
.title(Some(row.get("title")))
|
||||
.link(Some(link))
|
||||
.guid(Some(guid))
|
||||
.pub_date(Some(Local::now().to_rfc2822()))
|
||||
.description(Some(row.get("description")))
|
||||
.build(),
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
let channel = rss::ChannelBuilder::default()
|
||||
.title("[$] lwn.net")
|
||||
.link("https://dawl.fr/lwn.net/rss.xml")
|
||||
.description("RSS flux of lwn.net paid articles that are freely released.")
|
||||
.items(items)
|
||||
.build();
|
||||
|
||||
path.push("lwn.xml");
|
||||
source::save_xml(&channel.to_string(), &path)
|
||||
}
|
||||
}
|
||||
198
src/main.rs
198
src/main.rs
|
|
@ -1,198 +0,0 @@
|
|||
use chrono::{NaiveDate, NaiveDateTime, TimeZone, prelude::Local};
|
||||
use clap::Parser;
|
||||
use regex::Regex;
|
||||
use reqwest::get;
|
||||
use scraper::{Html, Selector};
|
||||
use std::env;
|
||||
use std::error::Error;
|
||||
use std::fs::File;
|
||||
use std::io::BufReader;
|
||||
use std::io::Write;
|
||||
use tokio::runtime::Runtime;
|
||||
use tokio_postgres;
|
||||
|
||||
#[derive(Parser)]
|
||||
struct Cli {
|
||||
path: std::path::PathBuf,
|
||||
}
|
||||
|
||||
fn main() {
|
||||
let rt = Runtime::new().unwrap();
|
||||
|
||||
let args = Cli::parse();
|
||||
let db_connection_string = &format!(
|
||||
"host={} dbname={} user={} password={}",
|
||||
env::var("POSTGRES_HOST").unwrap_or(String::from("localhost")),
|
||||
env::var("POSTGRES_USER").unwrap_or(String::from("dev")),
|
||||
env::var("POSTGRES_USER").unwrap_or(String::from("dev")),
|
||||
env::var("POSTGRES_PASSWORD").unwrap_or(String::from("dev"))
|
||||
);
|
||||
|
||||
println!("Connection string: {}", db_connection_string);
|
||||
|
||||
rt.block_on(async {
|
||||
// Connect to the database.
|
||||
if let Ok((client, connection)) =
|
||||
tokio_postgres::connect(db_connection_string, tokio_postgres::NoTls).await
|
||||
{
|
||||
tokio::spawn(async move {
|
||||
if let Err(e) = connection.await {
|
||||
eprintln!("[x] Connection db error: {}", e);
|
||||
}
|
||||
});
|
||||
|
||||
if let Err(e) = client
|
||||
.query(
|
||||
"CREATE TABLE IF NOT EXISTS articles (
|
||||
link TEXT,
|
||||
title TEXT,
|
||||
description TEXT,
|
||||
pub_date TEXT,
|
||||
release_date TEXT
|
||||
)",
|
||||
&[],
|
||||
)
|
||||
.await
|
||||
{
|
||||
eprintln!("[x] Table creation error: {}", e);
|
||||
}
|
||||
|
||||
// Get new [$] articles
|
||||
if let Ok(items) = fetch_paid_article_urls().await {
|
||||
for item in items {
|
||||
if let Some(link) = item.link() {
|
||||
match client
|
||||
.query_opt(
|
||||
"SELECT release_date FROM articles WHERE link = $1",
|
||||
&[&link],
|
||||
)
|
||||
.await
|
||||
{
|
||||
Ok(None) => {
|
||||
if let Ok(Some(date)) = fetch_release_date(&link).await {
|
||||
if let (Some(title), Some(description), Some(pub_date)) =
|
||||
(item.title(), item.description(), item.pub_date())
|
||||
{
|
||||
println!("Adding new article to db: {}", link);
|
||||
|
||||
if let Err(e) = client
|
||||
.query(
|
||||
"INSERT INTO articles (
|
||||
link,
|
||||
title,
|
||||
description,
|
||||
pub_date,
|
||||
release_date
|
||||
) VALUES (
|
||||
$1, $2, $3, $4, $5)",
|
||||
&[
|
||||
&link,
|
||||
&title,
|
||||
&description,
|
||||
&pub_date,
|
||||
&date.to_string(),
|
||||
],
|
||||
)
|
||||
.await
|
||||
{
|
||||
eprintln!("[x] Error insert: {}", e);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
_ => (),
|
||||
}
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
// TODO: How to manage the RSS xml file
|
||||
let mut channel = match File::open(&args.path) {
|
||||
Ok(file) => rss::Channel::read_from(BufReader::new(file)).unwrap(),
|
||||
_ => rss::ChannelBuilder::default()
|
||||
.title("[$] lwn.net")
|
||||
.link("https://dawl.fr/lwn.net/rss.xml")
|
||||
.description("RSS flux of lwn.net paid articles that are freely released.")
|
||||
.items(vec![])
|
||||
.build(),
|
||||
};
|
||||
let mut items = channel.clone().into_items();
|
||||
|
||||
if let Ok(saved_articles) = client.query("SELECT * FROM articles", &[]).await {
|
||||
for row in saved_articles {
|
||||
let date: &str = row.get("release_date");
|
||||
if let Ok(date) = NaiveDateTime::parse_from_str(date, "%Y-%m-%d %H:%M:%S") {
|
||||
if Local.from_local_datetime(&date).unwrap() < Local::now() {
|
||||
let link: String = row.get("title");
|
||||
let guid = rss::GuidBuilder::default()
|
||||
.value(link.clone())
|
||||
.permalink(true)
|
||||
.build();
|
||||
|
||||
items.push(
|
||||
rss::ItemBuilder::default()
|
||||
.title(Some(row.get("title")))
|
||||
.link(Some(link))
|
||||
.guid(Some(guid))
|
||||
.pub_date(Some(Local::now().to_rfc2822()))
|
||||
.description(Some(row.get("description")))
|
||||
.build(),
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
channel.set_items(items);
|
||||
if let Err(e) = save_xml(&channel.to_string(), args.path) {
|
||||
eprintln!("[x] Failed to save xml: {}", e);
|
||||
}
|
||||
} else {
|
||||
eprintln!("[x] Failed to connect to the db");
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
fn save_xml(rss_string: &str, path: std::path::PathBuf) -> std::io::Result<()> {
|
||||
let mut file = File::create(path)?;
|
||||
file.write_all(rss_string.as_bytes())?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn fetch_release_date(url: &str) -> Result<Option<NaiveDateTime>, Box<dyn Error>> {
|
||||
let response = get(url).await?.text().await?;
|
||||
|
||||
if let Some(article_text) = Html::parse_document(&response)
|
||||
.select(&Selector::parse("div.ArticleText")?)
|
||||
.next()
|
||||
{
|
||||
if let Some(yes) = article_text.select(&Selector::parse("p")?).last() {
|
||||
let re = Regex::new(
|
||||
r#"(?m)\(Alternatively, this item will become freely\s*available on ([A-Z][a-z]+ [0-9]{1,2}, [0-9]{4})\)"#,
|
||||
)?;
|
||||
if let Some(cap) = re.captures(&yes.inner_html()) {
|
||||
if let Some(date) = cap.get(1) {
|
||||
return Ok(Some(
|
||||
NaiveDate::parse_from_str(date.as_str(), "%B %d, %Y")?
|
||||
.and_hms_opt(0, 0, 0)
|
||||
.unwrap(),
|
||||
));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(None)
|
||||
}
|
||||
|
||||
async fn fetch_paid_article_urls() -> Result<Vec<rss::Item>, Box<dyn Error>> {
|
||||
let response = get("https://lwn.net/headlines/rss").await?.bytes().await?;
|
||||
let channel = rss::Channel::read_from(&response[..])?;
|
||||
|
||||
Ok(channel
|
||||
.items()
|
||||
.iter()
|
||||
.filter(|i| i.title().unwrap_or("").starts_with("[$]"))
|
||||
.map(|i| i.clone())
|
||||
.collect::<Vec<rss::Item>>())
|
||||
}
|
||||
22
src/source.rs
Normal file
22
src/source.rs
Normal file
|
|
@ -0,0 +1,22 @@
|
|||
use crate::error::{Error, RssifyResult};
|
||||
use std::fs::File;
|
||||
use std::io::Write;
|
||||
use std::path::PathBuf;
|
||||
|
||||
pub trait Source {
|
||||
fn fetch(client: &tokio_postgres::Client) -> impl Future<Output = RssifyResult<()>>;
|
||||
fn publish(
|
||||
client: &tokio_postgres::Client,
|
||||
path: PathBuf,
|
||||
) -> impl Future<Output = RssifyResult<()>>;
|
||||
}
|
||||
|
||||
pub fn save_xml(rss_string: &str, path: &std::path::PathBuf) -> RssifyResult<()> {
|
||||
if let Ok(mut file) = File::create(path) {
|
||||
if file.write_all(rss_string.as_bytes()).is_ok() {
|
||||
return Ok(());
|
||||
}
|
||||
}
|
||||
|
||||
Err(Error::save_xml_error(path.to_str().unwrap()))
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue