Compare commits

..

10 commits

Author SHA1 Message Date
378210b5eb
Fix typo to publish lwn 2025-09-04 12:22:30 +02:00
a56522695c
Bump 0.1.1 2025-08-27 21:45:44 +02:00
a85f6878ae
Add DATABASE_CONNECTION option 2025-08-27 21:36:23 +02:00
580f80b521
Improve nix setup 2025-08-27 18:11:12 +02:00
6f85efcd26
Add KEV 2025-08-27 16:45:08 +02:00
23b263a50b Add readme 2025-08-06 11:29:14 +02:00
379df61598 Create .env.example 2025-08-06 11:24:20 +02:00
2e9b73a768 Add build and run commands with Dockerfile 2025-08-06 11:24:08 +02:00
c9a1c59eb4 Improve cli args 2025-08-06 11:23:06 +02:00
f6af36e999 Update .gitignore 2025-08-06 11:21:51 +02:00
18 changed files with 3712 additions and 221 deletions

3
.env.example Normal file
View file

@ -0,0 +1,3 @@
POSTGRES_DB=root
POSTGRES_USER=root
POSTGRES_PASSWORD=root

27
.gitignore vendored
View file

@ -1,3 +1,24 @@
/target *.env
flake.lock
.direnv/ target
**/*.rs.bk
.idea
*.iml
/result*
*.log
*~
# cachix tmp file
store-path-pre-build
# Devenv
.devenv*
devenv.local.nix
# direnv
.direnv
# pre-commit
.pre-commit-config.yaml
template/flake.lock

3082
Cargo.lock generated Executable file

File diff suppressed because it is too large Load diff

View file

@ -1,8 +1,17 @@
[package] [package]
name = "lwn-sub-snoozer" name = "rssify"
version = "0.1.0" version = "0.1.1"
edition = "2024" edition = "2024"
[lib]
name = "rssify"
path = "src/lib.rs"
[[bin]]
name = "rssify-cli"
path = "src/bin/cli.rs"
# required-features = ["cli"]
[dependencies] [dependencies]
tokio = { version = "1.46.0", features = ["full"] } tokio = { version = "1.46.0", features = ["full"] }
futures = "0.3.31" futures = "0.3.31"
@ -13,3 +22,5 @@ chrono = "0.4.41"
rss = "2.0.12" rss = "2.0.12"
mini-redis = "0.4.1" mini-redis = "0.4.1"
tokio-postgres = "0.7.13" tokio-postgres = "0.7.13"
clap = { version = "4.5.42", features = ["derive"] }
json = "0.12.4"

7
Dockerfile Normal file
View file

@ -0,0 +1,7 @@
FROM rust:1.88
WORKDIR /src
COPY . .
RUN cargo build --release
CMD ["./target/release/rssify-cli", "/rss/"]

28
README.md Normal file
View file

@ -0,0 +1,28 @@
# rssify - Transform some website I like too RSS feeds
- lwn paid articles
- CISA KEV release
## Usage
### Using nix
```nix
nix develop # to get the dev dependencies
nix build
nix run -- /tmp/rss/
```
### Using podman
Use the `justfile` to run commands:
```bash
just init # Will init the database, and build the app image
just run # Will run the lwn-sub-snoozer to update the database and the RSS file
```
## TODO
- [ ] Add volume to the db to store it if it crashes
- [ ] Add tests

16
default.nix Normal file
View file

@ -0,0 +1,16 @@
{pkgs ? import <nixpkgs> {}}: let
manifest = (pkgs.lib.importTOML ./Cargo.toml).package;
in
pkgs.rustPlatform.buildRustPackage {
pname = manifest.name;
version = manifest.version;
cargoLock.lockFile = ./Cargo.lock;
src = pkgs.lib.cleanSource ./.;
nativeBuildInputs = with pkgs; [
pkg-config
];
buildInputs = with pkgs; [
openssl
];
}

27
flake.lock generated Normal file
View file

@ -0,0 +1,27 @@
{
"nodes": {
"nixpkgs": {
"locked": {
"lastModified": 1756217674,
"narHash": "sha256-TH1SfSP523QI7kcPiNtMAEuwZR3Jdz0MCDXPs7TS8uo=",
"owner": "NixOS",
"repo": "nixpkgs",
"rev": "4e7667a90c167f7a81d906e5a75cba4ad8bee620",
"type": "github"
},
"original": {
"owner": "NixOS",
"ref": "nixos-25.05",
"repo": "nixpkgs",
"type": "github"
}
},
"root": {
"inputs": {
"nixpkgs": "nixpkgs"
}
}
},
"root": "root",
"version": 7
}

View file

@ -1,39 +1,22 @@
{ {
description = "lwn-sub-snoozer"; description = "lwn-sub-snoozer";
inputs = { inputs = {
flake-utils.url = "github:numtide/flake-utils";
nixpkgs.url = "github:NixOS/nixpkgs/nixos-25.05"; nixpkgs.url = "github:NixOS/nixpkgs/nixos-25.05";
fenix = {
url = "github:nix-community/fenix/monthly";
inputs.nixpkgs.follows = "nixpkgs";
};
}; };
outputs = { outputs = {
self, self,
nixpkgs, nixpkgs,
flake-utils, }: let
fenix, supportedSystems = ["x86_64-linux"];
}: forAllSystems = nixpkgs.lib.genAttrs supportedSystems;
flake-utils.lib.eachDefaultSystem (system: let pkgsFor = nixpkgs.legacyPackages;
pkgs = nixpkgs.legacyPackages.${system}; in {
in { packages = forAllSystems (system: {
packages = { default = pkgsFor.${system}.callPackage ./. {};
default = self.packages.${system}.myapp;
};
# $ nix develop
devShells.default = pkgs.mkShell {
LD_LIBRARY_PATH = pkgs.lib.makeLibraryPath [pkgs.openssl];
packages = [
pkgs.pkg-config
pkgs.openssl
pkgs.nixpkgs-fmt
pkgs.nil
pkgs.postgresql
# Rust
fenix.packages.${system}.default.toolchain
];
};
}); });
devShells = forAllSystems (system: {
default = pkgsFor.${system}.callPackage ./shell.nix {};
});
};
} }

View file

@ -1,7 +1,41 @@
set dotenv-load
POSTGRES_HOST := 'rssify_db'
TEMP_DIR := `mktemp -d`
clean: clean:
rm -rf target rm -rf target
podman network rm lwn-sub-snoozer || true podman stop rssify_db || true
podman network rm rssify_network || true
db: init_db:
podman network create lwn-sub-snoozer || true podman network create rssify_network || true
podman run --name postgres --rm -p 5432:5432 --network=lwn-sub-snoozer -e POSTGRES_DB=dev -e POSTGRES_USER=root -e POSTGRES_PASSWORD=root docker.io/postgres:alpine podman run --rm -d --replace \
--name {{POSTGRES_HOST}} \
--network=rssify_network \
-p 5432:5432 \
--env-file .env \
docker.io/postgres:alpine
build:
podman build --tag rssify_app:latest .
init: init_db build
attach:
podman run -it --rm \
--name rssify_app \
--network rssify_network \
-e POSTGRES_HOST={{POSTGRES_HOST}} \
--env-file .env \
--volume {{TEMP_DIR}}:/rss \
rssify_app:latest sh
run:
podman run --rm \
--name rssify_app \
--network rssify_network \
-e POSTGRES_HOST={{POSTGRES_HOST}} \
--env-file .env \
--volume {{TEMP_DIR}}:/rss \
rssify_app:latest

11
shell.nix Normal file
View file

@ -0,0 +1,11 @@
{pkgs ? import <nixpkgs> {}}:
pkgs.mkShell {
inputsFrom = [(pkgs.callPackage ./default.nix {})];
buildInputs = with pkgs; [
rust-analyzer
alejandra
clippy
nixd
nil
];
}

61
src/bin/cli.rs Executable file
View file

@ -0,0 +1,61 @@
use clap::Parser;
use rssify::{kev::KEV, lwn::LWN, source::Source};
use std::env;
use tokio::runtime::Runtime;
use tokio_postgres;
#[derive(Parser)]
struct Cli {
path: std::path::PathBuf,
}
fn main() {
let rt = Runtime::new().unwrap();
let args = Cli::parse();
let db_connection_string = match env::var("DATABASE_CONNECTION") {
Ok(s) => s,
_ => format!(
"host={} dbname={} user={} password={}",
env::var("POSTGRES_HOST").unwrap_or(String::from("localhost")),
env::var("POSTGRES_USER").unwrap_or(String::from("root")),
env::var("POSTGRES_USER").unwrap_or(String::from("root")),
env::var("POSTGRES_PASSWORD").unwrap_or(String::from("root"))
),
};
println!("Connection string: {}", db_connection_string);
rt.block_on(async {
// Connect to the database.
if let Ok((client, connection)) =
tokio_postgres::connect(&db_connection_string, tokio_postgres::NoTls).await
{
println!("Working...");
tokio::spawn(async move {
if let Err(e) = connection.await {
eprintln!("[x] Connection db error: {}", e);
}
});
println!("Connected");
if let Ok(_) = KEV::fetch(&client).await {
println!("KEV fetched successfully");
if let Ok(_) = KEV::publish(&client, args.path.clone()).await {
println!("KEV updated successfully");
}
}
if let Ok(_) = LWN::fetch(&client).await {
println!("LWN fetched successfully");
if let Ok(_) = LWN::publish(&client, args.path.clone()).await {
println!("LWN updated successfully");
}
}
} else {
eprintln!("[x] Could not connect with db");
}
});
}

80
src/error.rs Normal file
View file

@ -0,0 +1,80 @@
use std::str::Utf8Error;
#[derive(Debug, Copy, Clone, Eq, PartialEq)]
pub enum RssifyErrorKind {
InvalidKevCatalogue,
SaveXMLError,
InvalidNaiveDate,
Unknown,
}
#[derive(Debug)]
pub struct RssifyError {
/// Kind of error
kind: RssifyErrorKind,
/// Associated message of the context
pub message: String,
}
impl RssifyError {
pub fn new(kind: RssifyErrorKind, message: &str) -> Self {
RssifyError {
kind,
message: String::from(message),
}
}
pub fn kind(&self) -> RssifyErrorKind {
self.kind
}
}
#[derive(Debug)]
pub enum Error {
/// Rssify error
RssifyError(RssifyError),
Utf8Error(Utf8Error),
ReqwestError(reqwest::Error),
RssError(rss::Error),
}
impl Error {
pub fn new(kind: RssifyErrorKind, message: &str) -> Self {
Error::RssifyError(RssifyError::new(kind, message))
}
pub fn invalid_kev_catalogue() -> Self {
Error::RssifyError(RssifyError::new(
RssifyErrorKind::InvalidKevCatalogue,
"[KEV] Invalid KEV catalogue: failed to parse the JSON entry",
))
}
pub fn save_xml_error(path: &str) -> Self {
Error::RssifyError(RssifyError::new(
RssifyErrorKind::SaveXMLError,
&format!("Failed to safe XML feed to {}", path),
))
}
pub fn invalid_naive_date(date: &str) -> Self {
Error::RssifyError(RssifyError::new(
RssifyErrorKind::InvalidNaiveDate,
&format!("Failed to parse NaiveDate {}", date),
))
}
}
impl From<reqwest::Error> for Error {
fn from(e: reqwest::Error) -> Self {
Self::ReqwestError(e)
}
}
impl From<rss::Error> for Error {
fn from(e: rss::Error) -> Self {
Self::RssError(e)
}
}
pub type RssifyResult<T> = Result<T, Error>;

127
src/kev.rs Executable file
View file

@ -0,0 +1,127 @@
use reqwest::get;
use rss::{Guid, Item};
use std::path::PathBuf;
use tokio_postgres;
use crate::{
error::{Error, RssifyResult},
source::{self, Source},
};
pub struct KEV;
const URL: &str =
"https://www.cisa.gov/sites/default/files/feeds/known_exploited_vulnerabilities.json";
impl Source for KEV {
async fn fetch(client: &tokio_postgres::Client) -> RssifyResult<()> {
if let Err(e) = client
.query(
"CREATE TABLE IF NOT EXISTS kev (
title TEXT,
cveID TEXT,
description TEXT,
dateAdded TEXT
)",
&[],
)
.await
{
eprintln!("[x] KEV table creation error: {}", e);
}
let text = get(URL).await?.text().await?;
if let Ok(json) = json::parse(&text) {
if let Ok(last_db_entry) = client
.query(
"SELECT dateAdded, cveID FROM kev ORDER BY dateAdded desc LIMIT 1",
&[],
)
.await
{
let (last_db_cve_id, last_db_date_added): (&str, &str) = match last_db_entry.first()
{
Some(row) => (
row.try_get("cveID")
.map_err(|_| Error::invalid_kev_catalogue())?,
row.try_get("dateAdded")
.map_err(|_| Error::invalid_kev_catalogue())?,
),
_ => ("", ""),
};
println!(
"[DEBUG] Last db entry: {:?} - {:?}",
last_db_cve_id, last_db_date_added
);
let new_entries = json["vulnerabilities"]
.members()
.take_while(|entry| entry["cveID"] != last_db_cve_id);
for entry in new_entries {
if let Err(e) = client
.query(
"INSERT INTO kev (
cveID,
title,
dateAdded,
description
) VALUES (
$1, $2, $3, $4)",
&[
&entry["cveID"].as_str(),
&format!("{} - {}", entry["cveID"], entry["vulnerabilityName"]),
&entry["dateAdded"].as_str(),
&format!(
"Description: {}\nRequired actions: {}\nNotes: {}",
entry["shortDescription"],
entry["requiredAction"],
entry["notes"]
),
],
)
.await
{
eprintln!("[x] Error insert: {}", e);
}
}
}
}
Ok(())
}
async fn publish(client: &tokio_postgres::Client, mut path: PathBuf) -> RssifyResult<()> {
let mut items: Vec<Item> = Vec::new();
if let Ok(entries) = client
.query("SELECT * FROM kev ORDER BY dateAdded desc LIMIT 15", &[])
.await
{
for entry in entries {
let mut guid = Guid::default();
guid.set_value(entry.get::<_, &str>("cveID"));
items.push(
rss::ItemBuilder::default()
.title(Some(entry.get("title")))
.link(Some(String::from(URL)))
.guid(Some(guid))
.pub_date(Some(entry.get("dateAdded")))
.description(Some(entry.get("description")))
.build(),
);
}
};
let channel = rss::ChannelBuilder::default()
.title("CISA KEV")
.link(URL)
.description("CISA Catalog of Known Exploited Vulnerabilities")
.items(items)
.build();
path.push("kev.xml");
source::save_xml(&channel.to_string(), &path)
}
}

4
src/lib.rs Normal file
View file

@ -0,0 +1,4 @@
pub mod error;
pub mod kev;
pub mod lwn;
pub mod source;

158
src/lwn.rs Executable file
View file

@ -0,0 +1,158 @@
use chrono::{NaiveDate, NaiveDateTime, TimeZone, prelude::Local};
use regex::Regex;
use reqwest::get;
use rss::Item;
use scraper::{Html, Selector};
use std::path::PathBuf;
use tokio_postgres;
use crate::error::{Error, RssifyResult};
use crate::source::{self, Source};
pub struct LWN;
impl LWN {
async fn fetch_release_date(url: &str) -> RssifyResult<Option<NaiveDateTime>> {
let response = get(url).await?.text().await?;
if let Some(article_text) = Html::parse_document(&response)
.select(&Selector::parse("div.ArticleText").unwrap())
.next()
{
if let Some(yes) = article_text.select(&Selector::parse("p").unwrap()).last() {
let re = Regex::new(
r#"(?m)\(Alternatively, this item will become freely\s*available on ([A-Z][a-z]+ [0-9]{1,2}, [0-9]{4})\)"#,
).unwrap();
if let Some(cap) = re.captures(&yes.inner_html()) {
if let Some(date) = cap.get(1) {
return match NaiveDate::parse_from_str(date.as_str(), "%B %d, %Y") {
Ok(date) => Ok(Some(date.and_hms_opt(0, 0, 0).unwrap())),
Err(_) => Err(Error::invalid_naive_date(date.as_str())),
};
}
}
}
}
Ok(None)
}
async fn fetch_paid_article_urls() -> RssifyResult<Vec<rss::Item>> {
let response = get("https://lwn.net/headlines/rss").await?.bytes().await?;
let channel = rss::Channel::read_from(&response[..])?;
Ok(channel
.items()
.iter()
.filter(|i| i.title().unwrap_or("").starts_with("[$]"))
.map(|i| i.clone())
.collect::<Vec<rss::Item>>())
}
}
impl Source for LWN {
async fn fetch(client: &tokio_postgres::Client) -> RssifyResult<()> {
if let Err(e) = client
.query(
"CREATE TABLE IF NOT EXISTS lwn (
link TEXT,
title TEXT,
description TEXT,
pub_date TEXT,
release_date TEXT
)",
&[],
)
.await
{
eprintln!("[x] LWN table creation error: {}", e);
}
// Get new [$] articles
if let Ok(items) = LWN::fetch_paid_article_urls().await {
for item in items {
if let Some(link) = item.link() {
match client
.query_opt("SELECT release_date FROM lwn WHERE link = $1", &[&link])
.await
{
Ok(None) => {
if let Ok(Some(date)) = LWN::fetch_release_date(&link).await {
if let (Some(title), Some(description), Some(pub_date)) =
(item.title(), item.description(), item.pub_date())
{
println!("Adding new article to db: {}", link);
if let Err(e) = client
.query(
"INSERT INTO lwn (
link,
title,
description,
pub_date,
release_date
) VALUES (
$1, $2, $3, $4, $5)",
&[
&link,
&title,
&description,
&pub_date,
&date.to_string(),
],
)
.await
{
eprintln!("[x] Error insert: {}", e);
}
}
}
}
_ => (),
}
};
}
}
Ok(())
}
async fn publish(client: &tokio_postgres::Client, mut path: PathBuf) -> RssifyResult<()> {
let mut items: Vec<Item> = Vec::new();
if let Ok(saved_articles) = client.query("SELECT * FROM lwn", &[]).await {
for row in saved_articles {
let date: &str = row.get("release_date");
if let Ok(date) = NaiveDateTime::parse_from_str(date, "%Y-%m-%d %H:%M:%S") {
if Local.from_local_datetime(&date).unwrap() < Local::now() {
let link: String = row.get("link");
let guid = rss::GuidBuilder::default()
.value(link.clone())
.permalink(true)
.build();
items.push(
rss::ItemBuilder::default()
.title(Some(row.get("title")))
.link(Some(link))
.guid(Some(guid))
.pub_date(Some(Local::now().to_rfc2822()))
.description(Some(row.get("description")))
.build(),
);
}
}
}
};
let channel = rss::ChannelBuilder::default()
.title("[$] lwn.net")
.link("https://dawl.fr/lwn.net/rss.xml")
.description("RSS flux of lwn.net paid articles that are freely released.")
.items(items)
.build();
path.push("lwn.xml");
source::save_xml(&channel.to_string(), &path)
}
}

View file

@ -1,184 +0,0 @@
use chrono::{NaiveDate, NaiveDateTime, TimeZone, prelude::Local};
use regex::Regex;
use reqwest::get;
use scraper::{Html, Selector};
use std::error::Error;
use std::fs::File;
use std::io::BufReader;
use std::io::Write;
use tokio::{
runtime::Runtime,
sync::mpsc::{UnboundedReceiver, UnboundedSender, unbounded_channel},
};
use tokio_postgres;
fn main() {
let rt = Runtime::new().unwrap();
rt.block_on(async {
// Connect to the database.
if let Ok((client, connection)) = tokio_postgres::connect(
"host=localhost dbname=dev user=root password=root",
tokio_postgres::NoTls,
)
.await
{
tokio::spawn(async move {
if let Err(e) = connection.await {
eprintln!("connection error: {}", e);
}
});
if let Err(e) = client
.query(
"CREATE TABLE IF NOT EXISTS articles (
link TEXT,
title TEXT,
description TEXT,
pub_date TEXT,
release_date TEXT
)",
&[],
)
.await
{
eprintln!("table creation error: {}", e);
}
// Get new [$] articles
if let Ok(items) = fetch_paid_article_urls().await {
for item in items {
if let Some(link) = item.link() {
match client
.query_opt(
"SELECT release_date FROM articles WHERE link = $1",
&[&link],
)
.await
{
Ok(None) => {
if let Ok(Some(date)) = fetch_release_date(&link).await {
if let (Some(title), Some(description), Some(pub_date)) =
(item.title(), item.description(), item.pub_date())
{
println!("Adding new article to db: {}", link);
if let Err(e) = client
.query(
"INSERT INTO articles (
link,
title,
description,
pub_date,
release_date
) VALUES (
$1, $2, $3, $4, $5)",
&[
&link,
&title,
&description,
&pub_date,
&date.to_string(),
],
)
.await
{
eprintln!("Error insert: {}", e);
}
}
}
}
_ => (),
}
};
}
}
// TODO: How to manage the RSS xml file
let mut channel = match File::open("rss.xml") {
Ok(file) => rss::Channel::read_from(BufReader::new(file)).unwrap(),
_ => rss::ChannelBuilder::default()
.title("[$] lwn.net")
.link("https://dawl.fr/lwn.net/rss.xml")
.description("RSS flux of lwn.net paid articles that are freely released.")
.items(vec![])
.build(),
};
let mut items = channel.clone().into_items();
if let Ok(saved_articles) = client.query("SELECT * FROM articles", &[]).await {
for row in saved_articles {
let date: &str = row.get("release_date");
if let Ok(date) = NaiveDateTime::parse_from_str(date, "%Y-%m-%d %H:%M:%S") {
if Local.from_local_datetime(&date).unwrap() < Local::now() {
let link: String = row.get("title");
let guid = rss::GuidBuilder::default()
.value(link.clone())
.permalink(true)
.build();
items.push(
rss::ItemBuilder::default()
.title(Some(row.get("title")))
.link(Some(link))
.guid(Some(guid))
.pub_date(Some(Local::now().to_rfc2822()))
.description(Some(row.get("description")))
.build(),
);
}
}
}
};
channel.set_items(items);
if let Err(e) = save_xml(&channel.to_string()) {
eprintln!("failed to save xml: {}", e);
}
}
});
}
fn save_xml(rss_string: &str) -> std::io::Result<()> {
let mut file = File::create("paid_lwn_net_rss.xml")?;
file.write_all(rss_string.as_bytes())?;
Ok(())
}
async fn fetch_release_date(url: &str) -> Result<Option<NaiveDateTime>, Box<dyn Error>> {
let response = get(url).await?.text().await?;
if let Some(article_text) = Html::parse_document(&response)
.select(&Selector::parse("div.ArticleText")?)
.next()
{
if let Some(yes) = article_text.select(&Selector::parse("p")?).last() {
let re = Regex::new(
r#"(?m)\(Alternatively, this item will become freely\s*available on ([A-Z][a-z]+ [0-9]{1,2}, [0-9]{4})\)"#,
)?;
if let Some(cap) = re.captures(&yes.inner_html()) {
if let Some(date) = cap.get(1) {
return Ok(Some(
NaiveDate::parse_from_str(date.as_str(), "%B %d, %Y")?
.and_hms_opt(0, 0, 0)
.unwrap(),
));
}
}
}
}
Ok(None)
}
async fn fetch_paid_article_urls() -> Result<Vec<rss::Item>, Box<dyn Error>> {
let response = get("https://lwn.net/headlines/rss").await?.bytes().await?;
let channel = rss::Channel::read_from(&response[..])?;
Ok(channel
.items()
.iter()
.filter(|i| i.title().unwrap_or("").starts_with("[$]"))
.map(|i| i.clone())
.collect::<Vec<rss::Item>>())
}

22
src/source.rs Normal file
View file

@ -0,0 +1,22 @@
use crate::error::{Error, RssifyResult};
use std::fs::File;
use std::io::Write;
use std::path::PathBuf;
pub trait Source {
fn fetch(client: &tokio_postgres::Client) -> impl Future<Output = RssifyResult<()>>;
fn publish(
client: &tokio_postgres::Client,
path: PathBuf,
) -> impl Future<Output = RssifyResult<()>>;
}
pub fn save_xml(rss_string: &str, path: &std::path::PathBuf) -> RssifyResult<()> {
if let Ok(mut file) = File::create(path) {
if file.write_all(rss_string.as_bytes()).is_ok() {
return Ok(());
}
}
Err(Error::save_xml_error(path.to_str().unwrap()))
}