Add db support to store articles

This commit is contained in:
dolphinau 2025-07-28 18:47:16 +02:00
parent 06dafb24fc
commit 9c45142083
No known key found for this signature in database
6 changed files with 75 additions and 23 deletions

0
.envrc Normal file → Executable file
View file

0
.gitignore vendored Normal file → Executable file
View file

6
Cargo.toml Normal file → Executable file
View file

@ -4,12 +4,12 @@ version = "0.1.0"
edition = "2024"
[dependencies]
tokio = { version = "1.46.0", default-features = false, features = [
"rt-multi-thread",
] }
tokio = { version = "1.46.0", features = ["full"] }
futures = "0.3.31"
reqwest = "0.12.22"
scraper = "0.23.1"
regex = "1.11.1"
chrono = "0.4.41"
rss = "2.0.12"
mini-redis = "0.4.1"
tokio-postgres = "0.7.13"

6
flake.nix Normal file → Executable file
View file

@ -1,6 +1,5 @@
{
description = "lwn-sub-snoozer";
inputs = {
flake-utils.url = "github:numtide/flake-utils";
nixpkgs.url = "github:NixOS/nixpkgs/nixos-25.05";
@ -25,15 +24,12 @@
# $ nix develop
devShells.default = pkgs.mkShell {
LD_LIBRARY_PATH = pkgs.lib.makeLibraryPath [pkgs.openssl];
packages = [
pkgs.gnumake
pkgs.pkg-config
pkgs.openssl
# Nix
pkgs.nixpkgs-fmt
pkgs.nil
pkgs.postgresql
# Rust
fenix.packages.${system}.default.toolchain

7
justfile Normal file
View file

@ -0,0 +1,7 @@
clean:
rm -rf target
podman network rm lwn-sub-snoozer || true
db:
podman network create lwn-sub-snoozer || true
podman run --name postgres --rm -p 5432:5432 --network=lwn-sub-snoozer -e POSTGRES_DB=dev -e POSTGRES_USER=root -e POSTGRES_PASSWORD=root docker.io/postgres:alpine

79
src/main.rs Normal file → Executable file
View file

@ -3,21 +3,72 @@ use std::error::Error;
use chrono::NaiveDate;
use regex::Regex;
use reqwest::get;
use rss::Channel;
use scraper::{Html, Selector};
use tokio::{runtime::Runtime, sync::mpsc::unbounded_channel};
use tokio::{
runtime::Runtime,
sync::mpsc::{UnboundedReceiver, UnboundedSender, unbounded_channel},
};
use tokio_postgres;
fn main() {
let rt = Runtime::new().unwrap();
rt.block_on(async {
if let Ok(articles) = fetch_paid_article_urls().await {
for article in articles {
if let Ok(Some(date)) = fetch_release_date(&article).await {
// TODO
println!("Snooze {} to {}", article, date);
// Connect to the database.
if let Ok((client, connection)) = tokio_postgres::connect(
"host=localhost dbname=dev user=root password=root",
tokio_postgres::NoTls,
)
.await
{
tokio::spawn(async move {
if let Err(e) = connection.await {
eprintln!("connection error: {}", e);
}
});
// Get new [$] articles
if let Ok(items) = fetch_paid_article_urls().await {
for item in items {
if let Some(link) = item.link() {
match client
.query_opt("SELECT date FROM articles WHERE id = $1", &[&link])
.await
{
Ok(None) => {
if let Ok(Some(date)) = fetch_release_date(&link).await {
println!("Adding new article to db: {}", link);
if let Err(e) = client
.query(
"INSERT INTO articles (id, date) VALUES ($1, $2)",
&[&link, &date.to_string()],
)
.await
{
eprintln!("Error insert: {}", e);
}
}
}
_ => (),
}
};
}
}
// TODO: Check for new free articles
// client
// .query("SELECT * FROM articles")
// .await
// .unwrap()
// .iter()
// .map(|row| {
// let id = row.get("id");
// let date = row.get("date");
//
// if date < today {
// article.publish
// }
// })
}
});
}
@ -31,12 +82,11 @@ async fn fetch_release_date(url: &str) -> Result<Option<NaiveDate>, Box<dyn Erro
{
if let Some(yes) = article_text.select(&Selector::parse("p")?).last() {
let re = Regex::new(
r#"(?m)\(Alternatively, this item will become freely\n\s* available on ([A-Z][a-z]+ [0-9]{2}, [0-9]{4})\)"#,
r#"(?m)\(Alternatively, this item will become freely\s*available on ([A-Z][a-z]+ [0-9]{1,2}, [0-9]{4})\)"#,
)?;
if let Some(cap) = re.captures(&yes.inner_html()) {
if let Some(date) = cap.get(1) {
let date = NaiveDate::parse_from_str(date.as_str(), "%B %d, %Y")?;
return Ok(Some(date));
return Ok(Some(NaiveDate::parse_from_str(date.as_str(), "%B %d, %Y")?));
}
}
}
@ -45,15 +95,14 @@ async fn fetch_release_date(url: &str) -> Result<Option<NaiveDate>, Box<dyn Erro
Ok(None)
}
async fn fetch_paid_article_urls() -> Result<Vec<String>, Box<dyn Error>> {
async fn fetch_paid_article_urls() -> Result<Vec<rss::Item>, Box<dyn Error>> {
let response = get("https://lwn.net/headlines/rss").await?.bytes().await?;
let channel = Channel::read_from(&response[..])?;
let channel = rss::Channel::read_from(&response[..])?;
Ok(channel
.items()
.iter()
.filter(|i| i.title().unwrap_or("").starts_with("[$]"))
.filter_map(|i| i.link())
.map(|s| s.to_string())
.collect::<Vec<String>>())
.map(|i| i.clone())
.collect::<Vec<rss::Item>>())
}