Add db support to store articles
This commit is contained in:
parent
06dafb24fc
commit
9c45142083
6 changed files with 75 additions and 23 deletions
0
.envrc
Normal file → Executable file
0
.envrc
Normal file → Executable file
0
.gitignore
vendored
Normal file → Executable file
0
.gitignore
vendored
Normal file → Executable file
6
Cargo.toml
Normal file → Executable file
6
Cargo.toml
Normal file → Executable file
|
|
@ -4,12 +4,12 @@ version = "0.1.0"
|
||||||
edition = "2024"
|
edition = "2024"
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
tokio = { version = "1.46.0", default-features = false, features = [
|
tokio = { version = "1.46.0", features = ["full"] }
|
||||||
"rt-multi-thread",
|
|
||||||
] }
|
|
||||||
futures = "0.3.31"
|
futures = "0.3.31"
|
||||||
reqwest = "0.12.22"
|
reqwest = "0.12.22"
|
||||||
scraper = "0.23.1"
|
scraper = "0.23.1"
|
||||||
regex = "1.11.1"
|
regex = "1.11.1"
|
||||||
chrono = "0.4.41"
|
chrono = "0.4.41"
|
||||||
rss = "2.0.12"
|
rss = "2.0.12"
|
||||||
|
mini-redis = "0.4.1"
|
||||||
|
tokio-postgres = "0.7.13"
|
||||||
|
|
|
||||||
6
flake.nix
Normal file → Executable file
6
flake.nix
Normal file → Executable file
|
|
@ -1,6 +1,5 @@
|
||||||
{
|
{
|
||||||
description = "lwn-sub-snoozer";
|
description = "lwn-sub-snoozer";
|
||||||
|
|
||||||
inputs = {
|
inputs = {
|
||||||
flake-utils.url = "github:numtide/flake-utils";
|
flake-utils.url = "github:numtide/flake-utils";
|
||||||
nixpkgs.url = "github:NixOS/nixpkgs/nixos-25.05";
|
nixpkgs.url = "github:NixOS/nixpkgs/nixos-25.05";
|
||||||
|
|
@ -25,15 +24,12 @@
|
||||||
# $ nix develop
|
# $ nix develop
|
||||||
devShells.default = pkgs.mkShell {
|
devShells.default = pkgs.mkShell {
|
||||||
LD_LIBRARY_PATH = pkgs.lib.makeLibraryPath [pkgs.openssl];
|
LD_LIBRARY_PATH = pkgs.lib.makeLibraryPath [pkgs.openssl];
|
||||||
|
|
||||||
packages = [
|
packages = [
|
||||||
pkgs.gnumake
|
|
||||||
pkgs.pkg-config
|
pkgs.pkg-config
|
||||||
pkgs.openssl
|
pkgs.openssl
|
||||||
|
|
||||||
# Nix
|
|
||||||
pkgs.nixpkgs-fmt
|
pkgs.nixpkgs-fmt
|
||||||
pkgs.nil
|
pkgs.nil
|
||||||
|
pkgs.postgresql
|
||||||
|
|
||||||
# Rust
|
# Rust
|
||||||
fenix.packages.${system}.default.toolchain
|
fenix.packages.${system}.default.toolchain
|
||||||
|
|
|
||||||
7
justfile
Normal file
7
justfile
Normal file
|
|
@ -0,0 +1,7 @@
|
||||||
|
clean:
|
||||||
|
rm -rf target
|
||||||
|
podman network rm lwn-sub-snoozer || true
|
||||||
|
|
||||||
|
db:
|
||||||
|
podman network create lwn-sub-snoozer || true
|
||||||
|
podman run --name postgres --rm -p 5432:5432 --network=lwn-sub-snoozer -e POSTGRES_DB=dev -e POSTGRES_USER=root -e POSTGRES_PASSWORD=root docker.io/postgres:alpine
|
||||||
79
src/main.rs
Normal file → Executable file
79
src/main.rs
Normal file → Executable file
|
|
@ -3,21 +3,72 @@ use std::error::Error;
|
||||||
use chrono::NaiveDate;
|
use chrono::NaiveDate;
|
||||||
use regex::Regex;
|
use regex::Regex;
|
||||||
use reqwest::get;
|
use reqwest::get;
|
||||||
use rss::Channel;
|
|
||||||
use scraper::{Html, Selector};
|
use scraper::{Html, Selector};
|
||||||
use tokio::{runtime::Runtime, sync::mpsc::unbounded_channel};
|
use tokio::{
|
||||||
|
runtime::Runtime,
|
||||||
|
sync::mpsc::{UnboundedReceiver, UnboundedSender, unbounded_channel},
|
||||||
|
};
|
||||||
|
use tokio_postgres;
|
||||||
|
|
||||||
fn main() {
|
fn main() {
|
||||||
let rt = Runtime::new().unwrap();
|
let rt = Runtime::new().unwrap();
|
||||||
|
|
||||||
rt.block_on(async {
|
rt.block_on(async {
|
||||||
if let Ok(articles) = fetch_paid_article_urls().await {
|
// Connect to the database.
|
||||||
for article in articles {
|
if let Ok((client, connection)) = tokio_postgres::connect(
|
||||||
if let Ok(Some(date)) = fetch_release_date(&article).await {
|
"host=localhost dbname=dev user=root password=root",
|
||||||
// TODO
|
tokio_postgres::NoTls,
|
||||||
println!("Snooze {} to {}", article, date);
|
)
|
||||||
|
.await
|
||||||
|
{
|
||||||
|
tokio::spawn(async move {
|
||||||
|
if let Err(e) = connection.await {
|
||||||
|
eprintln!("connection error: {}", e);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
// Get new [$] articles
|
||||||
|
if let Ok(items) = fetch_paid_article_urls().await {
|
||||||
|
for item in items {
|
||||||
|
if let Some(link) = item.link() {
|
||||||
|
match client
|
||||||
|
.query_opt("SELECT date FROM articles WHERE id = $1", &[&link])
|
||||||
|
.await
|
||||||
|
{
|
||||||
|
Ok(None) => {
|
||||||
|
if let Ok(Some(date)) = fetch_release_date(&link).await {
|
||||||
|
println!("Adding new article to db: {}", link);
|
||||||
|
if let Err(e) = client
|
||||||
|
.query(
|
||||||
|
"INSERT INTO articles (id, date) VALUES ($1, $2)",
|
||||||
|
&[&link, &date.to_string()],
|
||||||
|
)
|
||||||
|
.await
|
||||||
|
{
|
||||||
|
eprintln!("Error insert: {}", e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
_ => (),
|
||||||
|
}
|
||||||
|
};
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// TODO: Check for new free articles
|
||||||
|
// client
|
||||||
|
// .query("SELECT * FROM articles")
|
||||||
|
// .await
|
||||||
|
// .unwrap()
|
||||||
|
// .iter()
|
||||||
|
// .map(|row| {
|
||||||
|
// let id = row.get("id");
|
||||||
|
// let date = row.get("date");
|
||||||
|
//
|
||||||
|
// if date < today {
|
||||||
|
// article.publish
|
||||||
|
// }
|
||||||
|
// })
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
@ -31,12 +82,11 @@ async fn fetch_release_date(url: &str) -> Result<Option<NaiveDate>, Box<dyn Erro
|
||||||
{
|
{
|
||||||
if let Some(yes) = article_text.select(&Selector::parse("p")?).last() {
|
if let Some(yes) = article_text.select(&Selector::parse("p")?).last() {
|
||||||
let re = Regex::new(
|
let re = Regex::new(
|
||||||
r#"(?m)\(Alternatively, this item will become freely\n\s* available on ([A-Z][a-z]+ [0-9]{2}, [0-9]{4})\)"#,
|
r#"(?m)\(Alternatively, this item will become freely\s*available on ([A-Z][a-z]+ [0-9]{1,2}, [0-9]{4})\)"#,
|
||||||
)?;
|
)?;
|
||||||
if let Some(cap) = re.captures(&yes.inner_html()) {
|
if let Some(cap) = re.captures(&yes.inner_html()) {
|
||||||
if let Some(date) = cap.get(1) {
|
if let Some(date) = cap.get(1) {
|
||||||
let date = NaiveDate::parse_from_str(date.as_str(), "%B %d, %Y")?;
|
return Ok(Some(NaiveDate::parse_from_str(date.as_str(), "%B %d, %Y")?));
|
||||||
return Ok(Some(date));
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -45,15 +95,14 @@ async fn fetch_release_date(url: &str) -> Result<Option<NaiveDate>, Box<dyn Erro
|
||||||
Ok(None)
|
Ok(None)
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn fetch_paid_article_urls() -> Result<Vec<String>, Box<dyn Error>> {
|
async fn fetch_paid_article_urls() -> Result<Vec<rss::Item>, Box<dyn Error>> {
|
||||||
let response = get("https://lwn.net/headlines/rss").await?.bytes().await?;
|
let response = get("https://lwn.net/headlines/rss").await?.bytes().await?;
|
||||||
let channel = Channel::read_from(&response[..])?;
|
let channel = rss::Channel::read_from(&response[..])?;
|
||||||
|
|
||||||
Ok(channel
|
Ok(channel
|
||||||
.items()
|
.items()
|
||||||
.iter()
|
.iter()
|
||||||
.filter(|i| i.title().unwrap_or("").starts_with("[$]"))
|
.filter(|i| i.title().unwrap_or("").starts_with("[$]"))
|
||||||
.filter_map(|i| i.link())
|
.map(|i| i.clone())
|
||||||
.map(|s| s.to_string())
|
.collect::<Vec<rss::Item>>())
|
||||||
.collect::<Vec<String>>())
|
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue