Add fetch paid articles

This commit is contained in:
dolphinau 2025-07-02 17:50:54 +02:00
parent 4a20c539f6
commit 06dafb24fc
No known key found for this signature in database
2 changed files with 35 additions and 16 deletions

View file

@ -1,41 +1,59 @@
use std::error::Error;
use chrono::NaiveDate;
use regex::Regex;
use reqwest::get;
use rss::Channel;
use scraper::{Html, Selector};
use tokio::runtime::Runtime;
use tokio::{runtime::Runtime, sync::mpsc::unbounded_channel};
fn main() {
let rt = Runtime::new().unwrap();
rt.block_on(fetch_release_date("https://lwn.net/Articles/1025629/"));
rt.block_on(async {
if let Ok(articles) = fetch_paid_article_urls().await {
for article in articles {
if let Ok(Some(date)) = fetch_release_date(&article).await {
// TODO
println!("Snooze {} to {}", article, date);
}
}
}
});
}
async fn fetch_release_date(url: &str) -> Option<NaiveDate> {
let response = get(url).await.unwrap();
let response_text = response.text().await.unwrap();
async fn fetch_release_date(url: &str) -> Result<Option<NaiveDate>, Box<dyn Error>> {
let response = get(url).await?.text().await?;
if let Some(article_text) = Html::parse_document(&response_text)
.select(&Selector::parse("div.ArticleText").unwrap())
if let Some(article_text) = Html::parse_document(&response)
.select(&Selector::parse("div.ArticleText")?)
.next()
{
if let Some(yes) = article_text.select(&Selector::parse("p").unwrap()).last() {
if let Some(yes) = article_text.select(&Selector::parse("p")?).last() {
let re = Regex::new(
r#"(?m)\(Alternatively, this item will become freely\n\s* available on ([A-Z][a-z]+ [0-9]{2}, [0-9]{4})\)"#,
)
.unwrap();
)?;
if let Some(cap) = re.captures(&yes.inner_html()) {
if let Some(date) = cap.get(1) {
return NaiveDate::parse_from_str(date.as_str(), "%B %d, %Y").ok();
let date = NaiveDate::parse_from_str(date.as_str(), "%B %d, %Y")?;
return Ok(Some(date));
}
}
}
}
None
Ok(None)
}
async fn fetch_paid_articles() -> Option<Vec<String>> {
let response = get("https://lwn.net/headlines/rss").await.unwrap();
let response_text = response.text().await.unwrap();
async fn fetch_paid_article_urls() -> Result<Vec<String>, Box<dyn Error>> {
let response = get("https://lwn.net/headlines/rss").await?.bytes().await?;
let channel = Channel::read_from(&response[..])?;
None
Ok(channel
.items()
.iter()
.filter(|i| i.title().unwrap_or("").starts_with("[$]"))
.filter_map(|i| i.link())
.map(|s| s.to_string())
.collect::<Vec<String>>())
}