Add fetch paid articles
This commit is contained in:
parent
4a20c539f6
commit
06dafb24fc
2 changed files with 35 additions and 16 deletions
|
|
@ -12,3 +12,4 @@ reqwest = "0.12.22"
|
||||||
scraper = "0.23.1"
|
scraper = "0.23.1"
|
||||||
regex = "1.11.1"
|
regex = "1.11.1"
|
||||||
chrono = "0.4.41"
|
chrono = "0.4.41"
|
||||||
|
rss = "2.0.12"
|
||||||
|
|
|
||||||
50
src/main.rs
50
src/main.rs
|
|
@ -1,41 +1,59 @@
|
||||||
|
use std::error::Error;
|
||||||
|
|
||||||
use chrono::NaiveDate;
|
use chrono::NaiveDate;
|
||||||
use regex::Regex;
|
use regex::Regex;
|
||||||
use reqwest::get;
|
use reqwest::get;
|
||||||
|
use rss::Channel;
|
||||||
use scraper::{Html, Selector};
|
use scraper::{Html, Selector};
|
||||||
use tokio::runtime::Runtime;
|
use tokio::{runtime::Runtime, sync::mpsc::unbounded_channel};
|
||||||
|
|
||||||
fn main() {
|
fn main() {
|
||||||
let rt = Runtime::new().unwrap();
|
let rt = Runtime::new().unwrap();
|
||||||
rt.block_on(fetch_release_date("https://lwn.net/Articles/1025629/"));
|
|
||||||
|
rt.block_on(async {
|
||||||
|
if let Ok(articles) = fetch_paid_article_urls().await {
|
||||||
|
for article in articles {
|
||||||
|
if let Ok(Some(date)) = fetch_release_date(&article).await {
|
||||||
|
// TODO
|
||||||
|
println!("Snooze {} to {}", article, date);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn fetch_release_date(url: &str) -> Option<NaiveDate> {
|
async fn fetch_release_date(url: &str) -> Result<Option<NaiveDate>, Box<dyn Error>> {
|
||||||
let response = get(url).await.unwrap();
|
let response = get(url).await?.text().await?;
|
||||||
let response_text = response.text().await.unwrap();
|
|
||||||
|
|
||||||
if let Some(article_text) = Html::parse_document(&response_text)
|
if let Some(article_text) = Html::parse_document(&response)
|
||||||
.select(&Selector::parse("div.ArticleText").unwrap())
|
.select(&Selector::parse("div.ArticleText")?)
|
||||||
.next()
|
.next()
|
||||||
{
|
{
|
||||||
if let Some(yes) = article_text.select(&Selector::parse("p").unwrap()).last() {
|
if let Some(yes) = article_text.select(&Selector::parse("p")?).last() {
|
||||||
let re = Regex::new(
|
let re = Regex::new(
|
||||||
r#"(?m)\(Alternatively, this item will become freely\n\s* available on ([A-Z][a-z]+ [0-9]{2}, [0-9]{4})\)"#,
|
r#"(?m)\(Alternatively, this item will become freely\n\s* available on ([A-Z][a-z]+ [0-9]{2}, [0-9]{4})\)"#,
|
||||||
)
|
)?;
|
||||||
.unwrap();
|
|
||||||
if let Some(cap) = re.captures(&yes.inner_html()) {
|
if let Some(cap) = re.captures(&yes.inner_html()) {
|
||||||
if let Some(date) = cap.get(1) {
|
if let Some(date) = cap.get(1) {
|
||||||
return NaiveDate::parse_from_str(date.as_str(), "%B %d, %Y").ok();
|
let date = NaiveDate::parse_from_str(date.as_str(), "%B %d, %Y")?;
|
||||||
|
return Ok(Some(date));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
None
|
Ok(None)
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn fetch_paid_articles() -> Option<Vec<String>> {
|
async fn fetch_paid_article_urls() -> Result<Vec<String>, Box<dyn Error>> {
|
||||||
let response = get("https://lwn.net/headlines/rss").await.unwrap();
|
let response = get("https://lwn.net/headlines/rss").await?.bytes().await?;
|
||||||
let response_text = response.text().await.unwrap();
|
let channel = Channel::read_from(&response[..])?;
|
||||||
|
|
||||||
None
|
Ok(channel
|
||||||
|
.items()
|
||||||
|
.iter()
|
||||||
|
.filter(|i| i.title().unwrap_or("").starts_with("[$]"))
|
||||||
|
.filter_map(|i| i.link())
|
||||||
|
.map(|s| s.to_string())
|
||||||
|
.collect::<Vec<String>>())
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue