Add fetch paid articles
This commit is contained in:
parent
4a20c539f6
commit
06dafb24fc
2 changed files with 35 additions and 16 deletions
|
|
@ -12,3 +12,4 @@ reqwest = "0.12.22"
|
|||
scraper = "0.23.1"
|
||||
regex = "1.11.1"
|
||||
chrono = "0.4.41"
|
||||
rss = "2.0.12"
|
||||
|
|
|
|||
50
src/main.rs
50
src/main.rs
|
|
@ -1,41 +1,59 @@
|
|||
use std::error::Error;
|
||||
|
||||
use chrono::NaiveDate;
|
||||
use regex::Regex;
|
||||
use reqwest::get;
|
||||
use rss::Channel;
|
||||
use scraper::{Html, Selector};
|
||||
use tokio::runtime::Runtime;
|
||||
use tokio::{runtime::Runtime, sync::mpsc::unbounded_channel};
|
||||
|
||||
fn main() {
|
||||
let rt = Runtime::new().unwrap();
|
||||
rt.block_on(fetch_release_date("https://lwn.net/Articles/1025629/"));
|
||||
|
||||
rt.block_on(async {
|
||||
if let Ok(articles) = fetch_paid_article_urls().await {
|
||||
for article in articles {
|
||||
if let Ok(Some(date)) = fetch_release_date(&article).await {
|
||||
// TODO
|
||||
println!("Snooze {} to {}", article, date);
|
||||
}
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
async fn fetch_release_date(url: &str) -> Option<NaiveDate> {
|
||||
let response = get(url).await.unwrap();
|
||||
let response_text = response.text().await.unwrap();
|
||||
async fn fetch_release_date(url: &str) -> Result<Option<NaiveDate>, Box<dyn Error>> {
|
||||
let response = get(url).await?.text().await?;
|
||||
|
||||
if let Some(article_text) = Html::parse_document(&response_text)
|
||||
.select(&Selector::parse("div.ArticleText").unwrap())
|
||||
if let Some(article_text) = Html::parse_document(&response)
|
||||
.select(&Selector::parse("div.ArticleText")?)
|
||||
.next()
|
||||
{
|
||||
if let Some(yes) = article_text.select(&Selector::parse("p").unwrap()).last() {
|
||||
if let Some(yes) = article_text.select(&Selector::parse("p")?).last() {
|
||||
let re = Regex::new(
|
||||
r#"(?m)\(Alternatively, this item will become freely\n\s* available on ([A-Z][a-z]+ [0-9]{2}, [0-9]{4})\)"#,
|
||||
)
|
||||
.unwrap();
|
||||
)?;
|
||||
if let Some(cap) = re.captures(&yes.inner_html()) {
|
||||
if let Some(date) = cap.get(1) {
|
||||
return NaiveDate::parse_from_str(date.as_str(), "%B %d, %Y").ok();
|
||||
let date = NaiveDate::parse_from_str(date.as_str(), "%B %d, %Y")?;
|
||||
return Ok(Some(date));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
None
|
||||
Ok(None)
|
||||
}
|
||||
|
||||
async fn fetch_paid_articles() -> Option<Vec<String>> {
|
||||
let response = get("https://lwn.net/headlines/rss").await.unwrap();
|
||||
let response_text = response.text().await.unwrap();
|
||||
async fn fetch_paid_article_urls() -> Result<Vec<String>, Box<dyn Error>> {
|
||||
let response = get("https://lwn.net/headlines/rss").await?.bytes().await?;
|
||||
let channel = Channel::read_from(&response[..])?;
|
||||
|
||||
None
|
||||
Ok(channel
|
||||
.items()
|
||||
.iter()
|
||||
.filter(|i| i.title().unwrap_or("").starts_with("[$]"))
|
||||
.filter_map(|i| i.link())
|
||||
.map(|s| s.to_string())
|
||||
.collect::<Vec<String>>())
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue