Add db support to store articles
This commit is contained in:
parent
06dafb24fc
commit
9c45142083
6 changed files with 75 additions and 23 deletions
0
.envrc
Normal file → Executable file
0
.envrc
Normal file → Executable file
0
.gitignore
vendored
Normal file → Executable file
0
.gitignore
vendored
Normal file → Executable file
6
Cargo.toml
Normal file → Executable file
6
Cargo.toml
Normal file → Executable file
|
|
@ -4,12 +4,12 @@ version = "0.1.0"
|
|||
edition = "2024"
|
||||
|
||||
[dependencies]
|
||||
tokio = { version = "1.46.0", default-features = false, features = [
|
||||
"rt-multi-thread",
|
||||
] }
|
||||
tokio = { version = "1.46.0", features = ["full"] }
|
||||
futures = "0.3.31"
|
||||
reqwest = "0.12.22"
|
||||
scraper = "0.23.1"
|
||||
regex = "1.11.1"
|
||||
chrono = "0.4.41"
|
||||
rss = "2.0.12"
|
||||
mini-redis = "0.4.1"
|
||||
tokio-postgres = "0.7.13"
|
||||
|
|
|
|||
6
flake.nix
Normal file → Executable file
6
flake.nix
Normal file → Executable file
|
|
@ -1,6 +1,5 @@
|
|||
{
|
||||
description = "lwn-sub-snoozer";
|
||||
|
||||
inputs = {
|
||||
flake-utils.url = "github:numtide/flake-utils";
|
||||
nixpkgs.url = "github:NixOS/nixpkgs/nixos-25.05";
|
||||
|
|
@ -25,15 +24,12 @@
|
|||
# $ nix develop
|
||||
devShells.default = pkgs.mkShell {
|
||||
LD_LIBRARY_PATH = pkgs.lib.makeLibraryPath [pkgs.openssl];
|
||||
|
||||
packages = [
|
||||
pkgs.gnumake
|
||||
pkgs.pkg-config
|
||||
pkgs.openssl
|
||||
|
||||
# Nix
|
||||
pkgs.nixpkgs-fmt
|
||||
pkgs.nil
|
||||
pkgs.postgresql
|
||||
|
||||
# Rust
|
||||
fenix.packages.${system}.default.toolchain
|
||||
|
|
|
|||
7
justfile
Normal file
7
justfile
Normal file
|
|
@ -0,0 +1,7 @@
|
|||
clean:
|
||||
rm -rf target
|
||||
podman network rm lwn-sub-snoozer || true
|
||||
|
||||
db:
|
||||
podman network create lwn-sub-snoozer || true
|
||||
podman run --name postgres --rm -p 5432:5432 --network=lwn-sub-snoozer -e POSTGRES_DB=dev -e POSTGRES_USER=root -e POSTGRES_PASSWORD=root docker.io/postgres:alpine
|
||||
79
src/main.rs
Normal file → Executable file
79
src/main.rs
Normal file → Executable file
|
|
@ -3,22 +3,73 @@ use std::error::Error;
|
|||
use chrono::NaiveDate;
|
||||
use regex::Regex;
|
||||
use reqwest::get;
|
||||
use rss::Channel;
|
||||
use scraper::{Html, Selector};
|
||||
use tokio::{runtime::Runtime, sync::mpsc::unbounded_channel};
|
||||
use tokio::{
|
||||
runtime::Runtime,
|
||||
sync::mpsc::{UnboundedReceiver, UnboundedSender, unbounded_channel},
|
||||
};
|
||||
use tokio_postgres;
|
||||
|
||||
fn main() {
|
||||
let rt = Runtime::new().unwrap();
|
||||
|
||||
rt.block_on(async {
|
||||
if let Ok(articles) = fetch_paid_article_urls().await {
|
||||
for article in articles {
|
||||
if let Ok(Some(date)) = fetch_release_date(&article).await {
|
||||
// TODO
|
||||
println!("Snooze {} to {}", article, date);
|
||||
// Connect to the database.
|
||||
if let Ok((client, connection)) = tokio_postgres::connect(
|
||||
"host=localhost dbname=dev user=root password=root",
|
||||
tokio_postgres::NoTls,
|
||||
)
|
||||
.await
|
||||
{
|
||||
tokio::spawn(async move {
|
||||
if let Err(e) = connection.await {
|
||||
eprintln!("connection error: {}", e);
|
||||
}
|
||||
});
|
||||
|
||||
// Get new [$] articles
|
||||
if let Ok(items) = fetch_paid_article_urls().await {
|
||||
for item in items {
|
||||
if let Some(link) = item.link() {
|
||||
match client
|
||||
.query_opt("SELECT date FROM articles WHERE id = $1", &[&link])
|
||||
.await
|
||||
{
|
||||
Ok(None) => {
|
||||
if let Ok(Some(date)) = fetch_release_date(&link).await {
|
||||
println!("Adding new article to db: {}", link);
|
||||
if let Err(e) = client
|
||||
.query(
|
||||
"INSERT INTO articles (id, date) VALUES ($1, $2)",
|
||||
&[&link, &date.to_string()],
|
||||
)
|
||||
.await
|
||||
{
|
||||
eprintln!("Error insert: {}", e);
|
||||
}
|
||||
}
|
||||
}
|
||||
_ => (),
|
||||
}
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
// TODO: Check for new free articles
|
||||
// client
|
||||
// .query("SELECT * FROM articles")
|
||||
// .await
|
||||
// .unwrap()
|
||||
// .iter()
|
||||
// .map(|row| {
|
||||
// let id = row.get("id");
|
||||
// let date = row.get("date");
|
||||
//
|
||||
// if date < today {
|
||||
// article.publish
|
||||
// }
|
||||
// })
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
|
|
@ -31,12 +82,11 @@ async fn fetch_release_date(url: &str) -> Result<Option<NaiveDate>, Box<dyn Erro
|
|||
{
|
||||
if let Some(yes) = article_text.select(&Selector::parse("p")?).last() {
|
||||
let re = Regex::new(
|
||||
r#"(?m)\(Alternatively, this item will become freely\n\s* available on ([A-Z][a-z]+ [0-9]{2}, [0-9]{4})\)"#,
|
||||
r#"(?m)\(Alternatively, this item will become freely\s*available on ([A-Z][a-z]+ [0-9]{1,2}, [0-9]{4})\)"#,
|
||||
)?;
|
||||
if let Some(cap) = re.captures(&yes.inner_html()) {
|
||||
if let Some(date) = cap.get(1) {
|
||||
let date = NaiveDate::parse_from_str(date.as_str(), "%B %d, %Y")?;
|
||||
return Ok(Some(date));
|
||||
return Ok(Some(NaiveDate::parse_from_str(date.as_str(), "%B %d, %Y")?));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -45,15 +95,14 @@ async fn fetch_release_date(url: &str) -> Result<Option<NaiveDate>, Box<dyn Erro
|
|||
Ok(None)
|
||||
}
|
||||
|
||||
async fn fetch_paid_article_urls() -> Result<Vec<String>, Box<dyn Error>> {
|
||||
async fn fetch_paid_article_urls() -> Result<Vec<rss::Item>, Box<dyn Error>> {
|
||||
let response = get("https://lwn.net/headlines/rss").await?.bytes().await?;
|
||||
let channel = Channel::read_from(&response[..])?;
|
||||
let channel = rss::Channel::read_from(&response[..])?;
|
||||
|
||||
Ok(channel
|
||||
.items()
|
||||
.iter()
|
||||
.filter(|i| i.title().unwrap_or("").starts_with("[$]"))
|
||||
.filter_map(|i| i.link())
|
||||
.map(|s| s.to_string())
|
||||
.collect::<Vec<String>>())
|
||||
.map(|i| i.clone())
|
||||
.collect::<Vec<rss::Item>>())
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue