Add db support to store articles

This commit is contained in:
dolphinau 2025-07-28 18:47:16 +02:00
parent 06dafb24fc
commit 9c45142083
No known key found for this signature in database
6 changed files with 75 additions and 23 deletions

79
src/main.rs Normal file → Executable file
View file

@ -3,21 +3,72 @@ use std::error::Error;
use chrono::NaiveDate;
use regex::Regex;
use reqwest::get;
use rss::Channel;
use scraper::{Html, Selector};
use tokio::{runtime::Runtime, sync::mpsc::unbounded_channel};
use tokio::{
runtime::Runtime,
sync::mpsc::{UnboundedReceiver, UnboundedSender, unbounded_channel},
};
use tokio_postgres;
fn main() {
let rt = Runtime::new().unwrap();
rt.block_on(async {
if let Ok(articles) = fetch_paid_article_urls().await {
for article in articles {
if let Ok(Some(date)) = fetch_release_date(&article).await {
// TODO
println!("Snooze {} to {}", article, date);
// Connect to the database.
if let Ok((client, connection)) = tokio_postgres::connect(
"host=localhost dbname=dev user=root password=root",
tokio_postgres::NoTls,
)
.await
{
tokio::spawn(async move {
if let Err(e) = connection.await {
eprintln!("connection error: {}", e);
}
});
// Get new [$] articles
if let Ok(items) = fetch_paid_article_urls().await {
for item in items {
if let Some(link) = item.link() {
match client
.query_opt("SELECT date FROM articles WHERE id = $1", &[&link])
.await
{
Ok(None) => {
if let Ok(Some(date)) = fetch_release_date(&link).await {
println!("Adding new article to db: {}", link);
if let Err(e) = client
.query(
"INSERT INTO articles (id, date) VALUES ($1, $2)",
&[&link, &date.to_string()],
)
.await
{
eprintln!("Error insert: {}", e);
}
}
}
_ => (),
}
};
}
}
// TODO: Check for new free articles
// client
// .query("SELECT * FROM articles")
// .await
// .unwrap()
// .iter()
// .map(|row| {
// let id = row.get("id");
// let date = row.get("date");
//
// if date < today {
// article.publish
// }
// })
}
});
}
@ -31,12 +82,11 @@ async fn fetch_release_date(url: &str) -> Result<Option<NaiveDate>, Box<dyn Erro
{
if let Some(yes) = article_text.select(&Selector::parse("p")?).last() {
let re = Regex::new(
r#"(?m)\(Alternatively, this item will become freely\n\s* available on ([A-Z][a-z]+ [0-9]{2}, [0-9]{4})\)"#,
r#"(?m)\(Alternatively, this item will become freely\s*available on ([A-Z][a-z]+ [0-9]{1,2}, [0-9]{4})\)"#,
)?;
if let Some(cap) = re.captures(&yes.inner_html()) {
if let Some(date) = cap.get(1) {
let date = NaiveDate::parse_from_str(date.as_str(), "%B %d, %Y")?;
return Ok(Some(date));
return Ok(Some(NaiveDate::parse_from_str(date.as_str(), "%B %d, %Y")?));
}
}
}
@ -45,15 +95,14 @@ async fn fetch_release_date(url: &str) -> Result<Option<NaiveDate>, Box<dyn Erro
Ok(None)
}
async fn fetch_paid_article_urls() -> Result<Vec<String>, Box<dyn Error>> {
async fn fetch_paid_article_urls() -> Result<Vec<rss::Item>, Box<dyn Error>> {
let response = get("https://lwn.net/headlines/rss").await?.bytes().await?;
let channel = Channel::read_from(&response[..])?;
let channel = rss::Channel::read_from(&response[..])?;
Ok(channel
.items()
.iter()
.filter(|i| i.title().unwrap_or("").starts_with("[$]"))
.filter_map(|i| i.link())
.map(|s| s.to_string())
.collect::<Vec<String>>())
.map(|i| i.clone())
.collect::<Vec<rss::Item>>())
}