Save item to psql and check for new articles

This commit is contained in:
dolphinau 2025-07-29 13:16:36 +02:00
parent 9c45142083
commit 8bfa5ff00c
No known key found for this signature in database

View file

@ -1,9 +1,11 @@
use std::error::Error; use std::error::Error;
use chrono::NaiveDate; use chrono::{NaiveDate, NaiveDateTime, TimeZone, prelude::Local};
use regex::Regex; use regex::Regex;
use reqwest::get; use reqwest::get;
use scraper::{Html, Selector}; use scraper::{Html, Selector};
use std::fs::File;
use std::io::BufReader;
use tokio::{ use tokio::{
runtime::Runtime, runtime::Runtime,
sync::mpsc::{UnboundedReceiver, UnboundedSender, unbounded_channel}, sync::mpsc::{UnboundedReceiver, UnboundedSender, unbounded_channel},
@ -27,21 +29,57 @@ fn main() {
} }
}); });
if let Err(e) = client
.query(
"CREATE TABLE IF NOT EXISTS articles (
link TEXT,
title TEXT,
description TEXT,
pub_date TEXT,
release_date TEXT
)",
&[],
)
.await
{
eprintln!("table creation error: {}", e);
}
// Get new [$] articles // Get new [$] articles
if let Ok(items) = fetch_paid_article_urls().await { if let Ok(items) = fetch_paid_article_urls().await {
for item in items { for item in items {
if let Some(link) = item.link() { if let Some(link) = item.link() {
match client match client
.query_opt("SELECT date FROM articles WHERE id = $1", &[&link]) .query_opt(
"SELECT release_date FROM articles WHERE link = $1",
&[&link],
)
.await .await
{ {
Ok(None) => { Ok(None) => {
if let Ok(Some(date)) = fetch_release_date(&link).await { if let Ok(Some(date)) = fetch_release_date(&link).await {
if let (Some(title), Some(description), Some(pub_date)) =
(item.title(), item.description(), item.pub_date())
{
println!("Adding new article to db: {}", link); println!("Adding new article to db: {}", link);
if let Err(e) = client if let Err(e) = client
.query( .query(
"INSERT INTO articles (id, date) VALUES ($1, $2)", "INSERT INTO articles (
&[&link, &date.to_string()], link,
title,
description,
pub_date,
release_date
) VALUES (
$1, $2, $3, $4, $5)",
&[
&link,
&title,
&description,
&pub_date,
&date.to_string(),
],
) )
.await .await
{ {
@ -49,31 +87,32 @@ fn main() {
} }
} }
} }
}
_ => (), _ => (),
} }
}; };
} }
} }
// TODO: How to manage the RSS xml file
// TODO: Check for new free articles // TODO: Check for new free articles
// client if let Ok(saved_articles) = client.query("SELECT * FROM articles", &[]).await {
// .query("SELECT * FROM articles") saved_articles.iter().for_each(|row| {
// .await let date: &str = row.get("release_date");
// .unwrap() if let Ok(date) = NaiveDateTime::parse_from_str(date, "%Y-%m-%d") {
// .iter() println!("date: {}", date);
// .map(|row| { if Local.from_local_datetime(&date).unwrap() < Local::now() {
// let id = row.get("id"); // TODO: item.publish
// let date = row.get("date"); }
// }
// if date < today { });
// article.publish }
// }
// })
} }
}); });
} }
async fn fetch_release_date(url: &str) -> Result<Option<NaiveDate>, Box<dyn Error>> { async fn fetch_release_date(url: &str) -> Result<Option<NaiveDateTime>, Box<dyn Error>> {
let response = get(url).await?.text().await?; let response = get(url).await?.text().await?;
if let Some(article_text) = Html::parse_document(&response) if let Some(article_text) = Html::parse_document(&response)
@ -86,7 +125,11 @@ async fn fetch_release_date(url: &str) -> Result<Option<NaiveDate>, Box<dyn Erro
)?; )?;
if let Some(cap) = re.captures(&yes.inner_html()) { if let Some(cap) = re.captures(&yes.inner_html()) {
if let Some(date) = cap.get(1) { if let Some(date) = cap.get(1) {
return Ok(Some(NaiveDate::parse_from_str(date.as_str(), "%B %d, %Y")?)); return Ok(Some(
NaiveDate::parse_from_str(date.as_str(), "%B %d, %Y")?
.and_hms_opt(0, 0, 0)
.unwrap(),
));
} }
} }
} }