This commit is contained in:
dolphinau 2025-08-27 16:45:08 +02:00
parent 23b263a50b
commit 6f85efcd26
No known key found for this signature in database
13 changed files with 3580 additions and 219 deletions

3082
Cargo.lock generated Executable file

File diff suppressed because it is too large Load diff

View file

@ -1,8 +1,17 @@
[package]
name = "lwn-sub-snoozer"
name = "rssify"
version = "0.1.0"
edition = "2024"
[lib]
name = "rssify"
path = "src/lib.rs"
[[bin]]
name = "rssify-cli"
path = "src/bin/cli.rs"
# required-features = ["cli"]
[dependencies]
tokio = { version = "1.46.0", features = ["full"] }
futures = "0.3.31"
@ -14,3 +23,4 @@ rss = "2.0.12"
mini-redis = "0.4.1"
tokio-postgres = "0.7.13"
clap = { version = "4.5.42", features = ["derive"] }
json = "0.12.4"

View file

@ -4,4 +4,4 @@ WORKDIR /src
COPY . .
RUN cargo build --release
CMD ["./target/release/lwn-sub-snoozer", "/rss/lwn-sub.xml"]
CMD ["./target/release/rssify-cli", "/rss/"]

View file

@ -1,3 +1,8 @@
# rssify - Transform some website I like too RSS feeds
- lwn paid articles
- CISA KEV release
## Usage
Use the `justfile` to run commands:

View file

@ -3,17 +3,12 @@
inputs = {
flake-utils.url = "github:numtide/flake-utils";
nixpkgs.url = "github:NixOS/nixpkgs/nixos-25.05";
fenix = {
url = "github:nix-community/fenix/monthly";
inputs.nixpkgs.follows = "nixpkgs";
};
};
outputs = {
self,
nixpkgs,
flake-utils,
fenix,
}:
flake-utils.lib.eachDefaultSystem (system: let
pkgs = nixpkgs.legacyPackages.${system};
@ -27,12 +22,19 @@
packages = [
pkgs.pkg-config
pkgs.openssl
pkgs.nixpkgs-fmt
pkgs.nil
pkgs.postgresql
# Nix
pkgs.nixpkgs-fmt
pkgs.nil
pkgs.nixd
pkgs.alejandra
# Rust
fenix.packages.${system}.default.toolchain
pkgs.rustfmt
pkgs.rustc
pkgs.cargo
pkgs.rust-analyzer
];
};
});

View file

@ -1,30 +1,41 @@
set dotenv-load
POSTGRES_HOST := 'lwn-sub-snoozer_db'
POSTGRES_HOST := 'rssify_db'
TEMP_DIR := `mktemp -d`
clean:
rm -rf target
podman network rm lwn-sub-snoozer_network || true
podman stop rssify_db || true
podman network rm rssify_network || true
init_db:
podman network create lwn-sub-snoozer_network || true
podman network create rssify_network || true
podman run --rm -d --replace \
--name {{POSTGRES_HOST}} \
--network=lwn-sub-snoozer_network \
--network=rssify_network \
-p 5432:5432 \
--env-file .env \
docker.io/postgres:alpine
init_app:
podman build --tag lwn-sub-snoozer_app:latest .
build:
podman build --tag rssify_app:latest .
init: init_db init_app
init: init_db build
attach:
podman run -it --rm \
--name rssify_app \
--network rssify_network \
-e POSTGRES_HOST={{POSTGRES_HOST}} \
--env-file .env \
--volume {{TEMP_DIR}}:/rss \
rssify_app:latest sh
run:
podman run --rm \
--name lwn-sub-snoozer_app \
--network lwn-sub-snoozer_network \
--name rssify_app \
--network rssify_network \
-e POSTGRES_HOST={{POSTGRES_HOST}} \
--env-file .env \
--volume /tmp/lwn_sub:/rss \
lwn-sub-snoozer_app:latest
--volume {{TEMP_DIR}}:/rss \
rssify_app:latest

58
src/bin/cli.rs Executable file
View file

@ -0,0 +1,58 @@
use clap::Parser;
use rssify::{kev::KEV, lwn::LWN, source::Source};
use std::env;
use tokio::runtime::Runtime;
use tokio_postgres;
#[derive(Parser)]
struct Cli {
path: std::path::PathBuf,
}
fn main() {
let rt = Runtime::new().unwrap();
let args = Cli::parse();
let db_connection_string = &format!(
"host={} dbname={} user={} password={}",
env::var("POSTGRES_HOST").unwrap_or(String::from("localhost")),
env::var("POSTGRES_USER").unwrap_or(String::from("root")),
env::var("POSTGRES_USER").unwrap_or(String::from("root")),
env::var("POSTGRES_PASSWORD").unwrap_or(String::from("root"))
);
println!("Connection string: {}", db_connection_string);
rt.block_on(async {
// Connect to the database.
if let Ok((client, connection)) =
tokio_postgres::connect(db_connection_string, tokio_postgres::NoTls).await
{
println!("Working...");
tokio::spawn(async move {
if let Err(e) = connection.await {
eprintln!("[x] Connection db error: {}", e);
}
});
println!("Connected");
if let Ok(_) = KEV::fetch(&client).await {
println!("KEV fetched successfully");
if let Ok(_) = KEV::publish(&client, args.path.clone()).await {
println!("KEV updated successfully");
}
}
if let Ok(_) = LWN::fetch(&client).await {
println!("LWN fetched successfully");
if let Ok(_) = LWN::publish(&client, args.path.clone()).await {
println!("LWN updated successfully");
}
}
} else {
eprintln!("[x] Could not connect with db");
}
});
}

80
src/error.rs Normal file
View file

@ -0,0 +1,80 @@
use std::str::Utf8Error;
#[derive(Debug, Copy, Clone, Eq, PartialEq)]
pub enum RssifyErrorKind {
InvalidKevCatalogue,
SaveXMLError,
InvalidNaiveDate,
Unknown,
}
#[derive(Debug)]
pub struct RssifyError {
/// Kind of error
kind: RssifyErrorKind,
/// Associated message of the context
pub message: String,
}
impl RssifyError {
pub fn new(kind: RssifyErrorKind, message: &str) -> Self {
RssifyError {
kind,
message: String::from(message),
}
}
pub fn kind(&self) -> RssifyErrorKind {
self.kind
}
}
#[derive(Debug)]
pub enum Error {
/// Rssify error
RssifyError(RssifyError),
Utf8Error(Utf8Error),
ReqwestError(reqwest::Error),
RssError(rss::Error),
}
impl Error {
pub fn new(kind: RssifyErrorKind, message: &str) -> Self {
Error::RssifyError(RssifyError::new(kind, message))
}
pub fn invalid_kev_catalogue() -> Self {
Error::RssifyError(RssifyError::new(
RssifyErrorKind::InvalidKevCatalogue,
"[KEV] Invalid KEV catalogue: failed to parse the JSON entry",
))
}
pub fn save_xml_error(path: &str) -> Self {
Error::RssifyError(RssifyError::new(
RssifyErrorKind::SaveXMLError,
&format!("Failed to safe XML feed to {}", path),
))
}
pub fn invalid_naive_date(date: &str) -> Self {
Error::RssifyError(RssifyError::new(
RssifyErrorKind::InvalidNaiveDate,
&format!("Failed to parse NaiveDate {}", date),
))
}
}
impl From<reqwest::Error> for Error {
fn from(e: reqwest::Error) -> Self {
Self::ReqwestError(e)
}
}
impl From<rss::Error> for Error {
fn from(e: rss::Error) -> Self {
Self::RssError(e)
}
}
pub type RssifyResult<T> = Result<T, Error>;

127
src/kev.rs Executable file
View file

@ -0,0 +1,127 @@
use reqwest::get;
use rss::{Guid, Item};
use std::path::PathBuf;
use tokio_postgres;
use crate::{
error::{Error, RssifyResult},
source::{self, Source},
};
pub struct KEV;
const URL: &str =
"https://www.cisa.gov/sites/default/files/feeds/known_exploited_vulnerabilities.json";
impl Source for KEV {
async fn fetch(client: &tokio_postgres::Client) -> RssifyResult<()> {
if let Err(e) = client
.query(
"CREATE TABLE IF NOT EXISTS kev (
title TEXT,
cveID TEXT,
description TEXT,
dateAdded TEXT
)",
&[],
)
.await
{
eprintln!("[x] KEV table creation error: {}", e);
}
let text = get(URL).await?.text().await?;
if let Ok(json) = json::parse(&text) {
if let Ok(last_db_entry) = client
.query(
"SELECT dateAdded, cveID FROM kev ORDER BY dateAdded desc LIMIT 1",
&[],
)
.await
{
let (last_db_cve_id, last_db_date_added): (&str, &str) = match last_db_entry.first()
{
Some(row) => (
row.try_get("cveID")
.map_err(|_| Error::invalid_kev_catalogue())?,
row.try_get("dateAdded")
.map_err(|_| Error::invalid_kev_catalogue())?,
),
_ => ("", ""),
};
println!(
"[DEBUG] Last db entry: {:?} - {:?}",
last_db_cve_id, last_db_date_added
);
let new_entries = json["vulnerabilities"]
.members()
.take_while(|entry| entry["cveID"] != last_db_cve_id);
for entry in new_entries {
if let Err(e) = client
.query(
"INSERT INTO kev (
cveID,
title,
dateAdded,
description
) VALUES (
$1, $2, $3, $4)",
&[
&entry["cveID"].as_str(),
&format!("{} - {}", entry["cveID"], entry["vulnerabilityName"]),
&entry["dateAdded"].as_str(),
&format!(
"Description: {}\nRequired actions: {}\nNotes: {}",
entry["shortDescription"],
entry["requiredAction"],
entry["notes"]
),
],
)
.await
{
eprintln!("[x] Error insert: {}", e);
}
}
}
}
Ok(())
}
async fn publish(client: &tokio_postgres::Client, mut path: PathBuf) -> RssifyResult<()> {
let mut items: Vec<Item> = Vec::new();
if let Ok(entries) = client
.query("SELECT * FROM kev ORDER BY dateAdded desc LIMIT 15", &[])
.await
{
for entry in entries {
let mut guid = Guid::default();
guid.set_value(entry.get::<_, &str>("cveID"));
items.push(
rss::ItemBuilder::default()
.title(Some(entry.get("title")))
.link(Some(String::from(URL)))
.guid(Some(guid))
.pub_date(Some(entry.get("dateAdded")))
.description(Some(entry.get("description")))
.build(),
);
}
};
let channel = rss::ChannelBuilder::default()
.title("CISA KEV")
.link(URL)
.description("CISA Catalog of Known Exploited Vulnerabilities")
.items(items)
.build();
path.push("kev.xml");
source::save_xml(&channel.to_string(), &path)
}
}

4
src/lib.rs Normal file
View file

@ -0,0 +1,4 @@
pub mod error;
pub mod kev;
pub mod lwn;
pub mod source;

158
src/lwn.rs Executable file
View file

@ -0,0 +1,158 @@
use chrono::{NaiveDate, NaiveDateTime, TimeZone, prelude::Local};
use regex::Regex;
use reqwest::get;
use rss::Item;
use scraper::{Html, Selector};
use std::path::PathBuf;
use tokio_postgres;
use crate::error::{Error, RssifyResult};
use crate::source::{self, Source};
pub struct LWN;
impl LWN {
async fn fetch_release_date(url: &str) -> RssifyResult<Option<NaiveDateTime>> {
let response = get(url).await?.text().await?;
if let Some(article_text) = Html::parse_document(&response)
.select(&Selector::parse("div.ArticleText").unwrap())
.next()
{
if let Some(yes) = article_text.select(&Selector::parse("p").unwrap()).last() {
let re = Regex::new(
r#"(?m)\(Alternatively, this item will become freely\s*available on ([A-Z][a-z]+ [0-9]{1,2}, [0-9]{4})\)"#,
).unwrap();
if let Some(cap) = re.captures(&yes.inner_html()) {
if let Some(date) = cap.get(1) {
return match NaiveDate::parse_from_str(date.as_str(), "%B %d, %Y") {
Ok(date) => Ok(Some(date.and_hms_opt(0, 0, 0).unwrap())),
Err(_) => Err(Error::invalid_naive_date(date.as_str())),
};
}
}
}
}
Ok(None)
}
async fn fetch_paid_article_urls() -> RssifyResult<Vec<rss::Item>> {
let response = get("https://lwn.net/headlines/rss").await?.bytes().await?;
let channel = rss::Channel::read_from(&response[..])?;
Ok(channel
.items()
.iter()
.filter(|i| i.title().unwrap_or("").starts_with("[$]"))
.map(|i| i.clone())
.collect::<Vec<rss::Item>>())
}
}
impl Source for LWN {
async fn fetch(client: &tokio_postgres::Client) -> RssifyResult<()> {
if let Err(e) = client
.query(
"CREATE TABLE IF NOT EXISTS lwn (
link TEXT,
title TEXT,
description TEXT,
pub_date TEXT,
release_date TEXT
)",
&[],
)
.await
{
eprintln!("[x] LWN table creation error: {}", e);
}
// Get new [$] articles
if let Ok(items) = LWN::fetch_paid_article_urls().await {
for item in items {
if let Some(link) = item.link() {
match client
.query_opt("SELECT release_date FROM lwn WHERE link = $1", &[&link])
.await
{
Ok(None) => {
if let Ok(Some(date)) = LWN::fetch_release_date(&link).await {
if let (Some(title), Some(description), Some(pub_date)) =
(item.title(), item.description(), item.pub_date())
{
println!("Adding new article to db: {}", link);
if let Err(e) = client
.query(
"INSERT INTO lwn (
link,
title,
description,
pub_date,
release_date
) VALUES (
$1, $2, $3, $4, $5)",
&[
&link,
&title,
&description,
&pub_date,
&date.to_string(),
],
)
.await
{
eprintln!("[x] Error insert: {}", e);
}
}
}
}
_ => (),
}
};
}
}
Ok(())
}
async fn publish(client: &tokio_postgres::Client, mut path: PathBuf) -> RssifyResult<()> {
let mut items: Vec<Item> = Vec::new();
if let Ok(saved_articles) = client.query("SELECT * FROM lwn", &[]).await {
for row in saved_articles {
let date: &str = row.get("release_date");
if let Ok(date) = NaiveDateTime::parse_from_str(date, "%Y-%m-%d %H:%M:%S") {
if Local.from_local_datetime(&date).unwrap() < Local::now() {
let link: String = row.get("title");
let guid = rss::GuidBuilder::default()
.value(link.clone())
.permalink(true)
.build();
items.push(
rss::ItemBuilder::default()
.title(Some(row.get("title")))
.link(Some(link))
.guid(Some(guid))
.pub_date(Some(Local::now().to_rfc2822()))
.description(Some(row.get("description")))
.build(),
);
}
}
}
};
let channel = rss::ChannelBuilder::default()
.title("[$] lwn.net")
.link("https://dawl.fr/lwn.net/rss.xml")
.description("RSS flux of lwn.net paid articles that are freely released.")
.items(items)
.build();
path.push("lwn.xml");
source::save_xml(&channel.to_string(), &path)
}
}

View file

@ -1,198 +0,0 @@
use chrono::{NaiveDate, NaiveDateTime, TimeZone, prelude::Local};
use clap::Parser;
use regex::Regex;
use reqwest::get;
use scraper::{Html, Selector};
use std::env;
use std::error::Error;
use std::fs::File;
use std::io::BufReader;
use std::io::Write;
use tokio::runtime::Runtime;
use tokio_postgres;
#[derive(Parser)]
struct Cli {
path: std::path::PathBuf,
}
fn main() {
let rt = Runtime::new().unwrap();
let args = Cli::parse();
let db_connection_string = &format!(
"host={} dbname={} user={} password={}",
env::var("POSTGRES_HOST").unwrap_or(String::from("localhost")),
env::var("POSTGRES_USER").unwrap_or(String::from("dev")),
env::var("POSTGRES_USER").unwrap_or(String::from("dev")),
env::var("POSTGRES_PASSWORD").unwrap_or(String::from("dev"))
);
println!("Connection string: {}", db_connection_string);
rt.block_on(async {
// Connect to the database.
if let Ok((client, connection)) =
tokio_postgres::connect(db_connection_string, tokio_postgres::NoTls).await
{
tokio::spawn(async move {
if let Err(e) = connection.await {
eprintln!("[x] Connection db error: {}", e);
}
});
if let Err(e) = client
.query(
"CREATE TABLE IF NOT EXISTS articles (
link TEXT,
title TEXT,
description TEXT,
pub_date TEXT,
release_date TEXT
)",
&[],
)
.await
{
eprintln!("[x] Table creation error: {}", e);
}
// Get new [$] articles
if let Ok(items) = fetch_paid_article_urls().await {
for item in items {
if let Some(link) = item.link() {
match client
.query_opt(
"SELECT release_date FROM articles WHERE link = $1",
&[&link],
)
.await
{
Ok(None) => {
if let Ok(Some(date)) = fetch_release_date(&link).await {
if let (Some(title), Some(description), Some(pub_date)) =
(item.title(), item.description(), item.pub_date())
{
println!("Adding new article to db: {}", link);
if let Err(e) = client
.query(
"INSERT INTO articles (
link,
title,
description,
pub_date,
release_date
) VALUES (
$1, $2, $3, $4, $5)",
&[
&link,
&title,
&description,
&pub_date,
&date.to_string(),
],
)
.await
{
eprintln!("[x] Error insert: {}", e);
}
}
}
}
_ => (),
}
};
}
}
// TODO: How to manage the RSS xml file
let mut channel = match File::open(&args.path) {
Ok(file) => rss::Channel::read_from(BufReader::new(file)).unwrap(),
_ => rss::ChannelBuilder::default()
.title("[$] lwn.net")
.link("https://dawl.fr/lwn.net/rss.xml")
.description("RSS flux of lwn.net paid articles that are freely released.")
.items(vec![])
.build(),
};
let mut items = channel.clone().into_items();
if let Ok(saved_articles) = client.query("SELECT * FROM articles", &[]).await {
for row in saved_articles {
let date: &str = row.get("release_date");
if let Ok(date) = NaiveDateTime::parse_from_str(date, "%Y-%m-%d %H:%M:%S") {
if Local.from_local_datetime(&date).unwrap() < Local::now() {
let link: String = row.get("title");
let guid = rss::GuidBuilder::default()
.value(link.clone())
.permalink(true)
.build();
items.push(
rss::ItemBuilder::default()
.title(Some(row.get("title")))
.link(Some(link))
.guid(Some(guid))
.pub_date(Some(Local::now().to_rfc2822()))
.description(Some(row.get("description")))
.build(),
);
}
}
}
};
channel.set_items(items);
if let Err(e) = save_xml(&channel.to_string(), args.path) {
eprintln!("[x] Failed to save xml: {}", e);
}
} else {
eprintln!("[x] Failed to connect to the db");
}
});
}
fn save_xml(rss_string: &str, path: std::path::PathBuf) -> std::io::Result<()> {
let mut file = File::create(path)?;
file.write_all(rss_string.as_bytes())?;
Ok(())
}
async fn fetch_release_date(url: &str) -> Result<Option<NaiveDateTime>, Box<dyn Error>> {
let response = get(url).await?.text().await?;
if let Some(article_text) = Html::parse_document(&response)
.select(&Selector::parse("div.ArticleText")?)
.next()
{
if let Some(yes) = article_text.select(&Selector::parse("p")?).last() {
let re = Regex::new(
r#"(?m)\(Alternatively, this item will become freely\s*available on ([A-Z][a-z]+ [0-9]{1,2}, [0-9]{4})\)"#,
)?;
if let Some(cap) = re.captures(&yes.inner_html()) {
if let Some(date) = cap.get(1) {
return Ok(Some(
NaiveDate::parse_from_str(date.as_str(), "%B %d, %Y")?
.and_hms_opt(0, 0, 0)
.unwrap(),
));
}
}
}
}
Ok(None)
}
async fn fetch_paid_article_urls() -> Result<Vec<rss::Item>, Box<dyn Error>> {
let response = get("https://lwn.net/headlines/rss").await?.bytes().await?;
let channel = rss::Channel::read_from(&response[..])?;
Ok(channel
.items()
.iter()
.filter(|i| i.title().unwrap_or("").starts_with("[$]"))
.map(|i| i.clone())
.collect::<Vec<rss::Item>>())
}

22
src/source.rs Normal file
View file

@ -0,0 +1,22 @@
use crate::error::{Error, RssifyResult};
use std::fs::File;
use std::io::Write;
use std::path::PathBuf;
pub trait Source {
fn fetch(client: &tokio_postgres::Client) -> impl Future<Output = RssifyResult<()>>;
fn publish(
client: &tokio_postgres::Client,
path: PathBuf,
) -> impl Future<Output = RssifyResult<()>>;
}
pub fn save_xml(rss_string: &str, path: &std::path::PathBuf) -> RssifyResult<()> {
if let Ok(mut file) = File::create(path) {
if file.write_all(rss_string.as_bytes()).is_ok() {
return Ok(());
}
}
Err(Error::save_xml_error(path.to_str().unwrap()))
}