diff --git a/Cargo.toml b/Cargo.toml index 96cd389..19d52b5 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -12,3 +12,4 @@ reqwest = "0.12.22" scraper = "0.23.1" regex = "1.11.1" chrono = "0.4.41" +rss = "2.0.12" diff --git a/src/main.rs b/src/main.rs index 147f2ad..62cc96f 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,41 +1,59 @@ +use std::error::Error; + use chrono::NaiveDate; use regex::Regex; use reqwest::get; +use rss::Channel; use scraper::{Html, Selector}; -use tokio::runtime::Runtime; +use tokio::{runtime::Runtime, sync::mpsc::unbounded_channel}; fn main() { let rt = Runtime::new().unwrap(); - rt.block_on(fetch_release_date("https://lwn.net/Articles/1025629/")); + + rt.block_on(async { + if let Ok(articles) = fetch_paid_article_urls().await { + for article in articles { + if let Ok(Some(date)) = fetch_release_date(&article).await { + // TODO + println!("Snooze {} to {}", article, date); + } + } + } + }); } -async fn fetch_release_date(url: &str) -> Option { - let response = get(url).await.unwrap(); - let response_text = response.text().await.unwrap(); +async fn fetch_release_date(url: &str) -> Result, Box> { + let response = get(url).await?.text().await?; - if let Some(article_text) = Html::parse_document(&response_text) - .select(&Selector::parse("div.ArticleText").unwrap()) + if let Some(article_text) = Html::parse_document(&response) + .select(&Selector::parse("div.ArticleText")?) .next() { - if let Some(yes) = article_text.select(&Selector::parse("p").unwrap()).last() { + if let Some(yes) = article_text.select(&Selector::parse("p")?).last() { let re = Regex::new( r#"(?m)\(Alternatively, this item will become freely\n\s* available on ([A-Z][a-z]+ [0-9]{2}, [0-9]{4})\)"#, - ) - .unwrap(); + )?; if let Some(cap) = re.captures(&yes.inner_html()) { if let Some(date) = cap.get(1) { - return NaiveDate::parse_from_str(date.as_str(), "%B %d, %Y").ok(); + let date = NaiveDate::parse_from_str(date.as_str(), "%B %d, %Y")?; + return Ok(Some(date)); } } } } - None + Ok(None) } -async fn fetch_paid_articles() -> Option> { - let response = get("https://lwn.net/headlines/rss").await.unwrap(); - let response_text = response.text().await.unwrap(); +async fn fetch_paid_article_urls() -> Result, Box> { + let response = get("https://lwn.net/headlines/rss").await?.bytes().await?; + let channel = Channel::read_from(&response[..])?; - None + Ok(channel + .items() + .iter() + .filter(|i| i.title().unwrap_or("").starts_with("[$]")) + .filter_map(|i| i.link()) + .map(|s| s.to_string()) + .collect::>()) }