|
@@ -1,7 +1,17 @@
|
|
|
use anyhow::{bail, Result};
|
|
|
use scraper;
|
|
|
use scraper::Element;
|
|
|
+use url::Url;
|
|
|
|
|
|
+/// Convert relate to absolute
|
|
|
+fn relative_to_absolute(url: &str, href: &str) -> Result<String> {
|
|
|
+ let base_url = Url::parse(url)?;
|
|
|
+ let new_url = base_url.join(href)?;
|
|
|
+ Ok(new_url.to_string())
|
|
|
+}
|
|
|
+
|
|
|
+#[allow(dead_code)]
|
|
|
+/// Find next sibling element.
|
|
|
fn next_element(element: scraper::ElementRef<'_>) -> Result<scraper::ElementRef<'_>> {
|
|
|
let next_node = element.next_sibling_element();
|
|
|
|
|
@@ -18,13 +28,10 @@ pub struct VerseOfDay {
|
|
|
}
|
|
|
|
|
|
fn element_text(element: scraper::ElementRef<'_>) -> String {
|
|
|
- let text = element.text()
|
|
|
- .map(|s| {
|
|
|
- s.trim_matches(char::is_whitespace)
|
|
|
- })
|
|
|
- .filter(|x| {
|
|
|
- !x.is_empty()
|
|
|
- })
|
|
|
+ let text = element
|
|
|
+ .text()
|
|
|
+ .map(|s| s.trim_matches(char::is_whitespace))
|
|
|
+ .filter(|x| !x.is_empty())
|
|
|
.collect::<String>();
|
|
|
text
|
|
|
}
|
|
@@ -40,7 +47,7 @@ pub fn find_vod(html: &String) -> Result<Vec<VerseOfDay>> {
|
|
|
let vod_div_select = scraper::Selector::parse("main>div>div>div>div").unwrap();
|
|
|
if let Some(vod_div) = document.select(&vod_div_select).next() {
|
|
|
// Ok, search just in this div for things of interest.
|
|
|
- /*
|
|
|
+ /*
|
|
|
// h1 text is "Verse of the Day"
|
|
|
let h1_select = scraper::Selector::parse("h1").unwrap();
|
|
|
let h1 = vod_div.select(&h1_select).next().unwrap();
|
|
@@ -51,12 +58,21 @@ pub fn find_vod(html: &String) -> Result<Vec<VerseOfDay>> {
|
|
|
// println!("p = {}", element_text(p)); // p.text().collect::<Vec<_>>());
|
|
|
|
|
|
let a_select = scraper::Selector::parse(r#"div[class~="mbs-2"]>a"#).unwrap();
|
|
|
- let mut verse_info = vod_div.select(&a_select).map(|a| element_text(a)).collect::<Vec<String>>();
|
|
|
+ let mut verse_info = vod_div
|
|
|
+ .select(&a_select)
|
|
|
+ .map(|a| element_text(a))
|
|
|
+ .collect::<Vec<String>>();
|
|
|
|
|
|
if verse_info.len() == 2 {
|
|
|
- result.push(VerseOfDay{date: element_text(p), verse:verse_info.remove(0), reference:verse_info.remove(0)});
|
|
|
+ result.push(VerseOfDay {
|
|
|
+ date: element_text(p),
|
|
|
+ verse: verse_info.remove(0),
|
|
|
+ reference: verse_info.remove(0),
|
|
|
+ });
|
|
|
+ } else {
|
|
|
+ bail!("Unable to locate today's verse. Has the HTML changed?");
|
|
|
}
|
|
|
- /*
|
|
|
+ /*
|
|
|
for a in vod_div.select(&a_select) {
|
|
|
println!("a = {}", element_text(a)); // a.text().collect::<Vec<_>>());
|
|
|
}
|
|
@@ -64,7 +80,7 @@ pub fn find_vod(html: &String) -> Result<Vec<VerseOfDay>> {
|
|
|
}
|
|
|
}
|
|
|
|
|
|
- /*
|
|
|
+ /*
|
|
|
// Verse of the day is div with two a's.
|
|
|
let h1_selector = scraper::Selector::parse("div>h1").unwrap();
|
|
|
let h1 = document.select(&h1_selector).next().unwrap();
|
|
@@ -88,14 +104,21 @@ pub fn find_vod(html: &String) -> Result<Vec<VerseOfDay>> {
|
|
|
|
|
|
for prev_div in document.select(&prev_div_selector) {
|
|
|
if let Some(p) = prev_div.select(&p_selector).next() {
|
|
|
- let mut verse_info = prev_div.select(&a_selector1).map(|a| element_text(a)).collect::<Vec<String>>();
|
|
|
+ let mut verse_info = prev_div
|
|
|
+ .select(&a_selector1)
|
|
|
+ .map(|a| element_text(a))
|
|
|
+ .collect::<Vec<String>>();
|
|
|
if verse_info.len() == 2 {
|
|
|
- result.push(VerseOfDay{date: element_text(p), verse: verse_info.remove(0), reference: verse_info.remove(0)});
|
|
|
+ result.push(VerseOfDay {
|
|
|
+ date: element_text(p),
|
|
|
+ verse: verse_info.remove(0),
|
|
|
+ reference: verse_info.remove(0),
|
|
|
+ });
|
|
|
}
|
|
|
// println!("{}", element_text(p)); // p.text().collect::<Vec<_>>());
|
|
|
}
|
|
|
|
|
|
- /*
|
|
|
+ /*
|
|
|
for a in prev_div.select(&a_selector1) {
|
|
|
if let Some(href) = a.attr("href") {
|
|
|
// let text = a.text().collect::<Vec<_>>();
|
|
@@ -124,7 +147,7 @@ pub fn find_vod(html: &String) -> Result<Vec<VerseOfDay>> {
|
|
|
// bail!("More dERP!");
|
|
|
}
|
|
|
|
|
|
-pub fn find_next_chapter(html: &String) -> Result<String> {
|
|
|
+pub fn find_next_chapter(html: &String, url: &str) -> Result<String> {
|
|
|
let document = scraper::Html::parse_document(html);
|
|
|
// let a_selector = scraper::Selector::parse("div>a").unwrap();
|
|
|
// This one works: (starts with "/bible/").
|
|
@@ -144,6 +167,7 @@ pub fn find_next_chapter(html: &String) -> Result<String> {
|
|
|
|
|
|
if let Some(href) = a.attr("href") {
|
|
|
// if href.contains("/bible/") {
|
|
|
+ let href_absolute = relative_to_absolute(url, href)?;
|
|
|
|
|
|
let text = a
|
|
|
.text()
|
|
@@ -164,7 +188,7 @@ pub fn find_next_chapter(html: &String) -> Result<String> {
|
|
|
// println!("Found: [{:?}]", text[0]);
|
|
|
continue;
|
|
|
}
|
|
|
- return Ok(href.to_string());
|
|
|
+ return Ok(href_absolute);
|
|
|
|
|
|
// } else {
|
|
|
// println!("href contains: [{}]", href);
|
|
@@ -184,13 +208,13 @@ mod tests {
|
|
|
let html = String::from(
|
|
|
r#"<div class="[pointer-events:all]"><a href="/bible/59/GEN.2.ESV"><div class="flex items-center justify-center bg-white z-[5] h-[50px] w-[50px] rounded-full border-gray-15 border-small border-solid shadow-light-1 active:shadow-dark-1 active:bg-gray-5 active:border-b-gray-5"><svg width="25" height="25" viewBox="0 0 24 24" fill="none" xmlns="http://www.w3.org/2000/svg" aria-labelledby="Next Chapter" class="text-gray-25"><title id="Next Chapter">Next Chapter</title><path fill-rule="evenodd" clip-rule="evenodd" d="M8.293 18.707a1 1 0 0 1 0-1.414l4.94-4.94a.5.5 0 0 0 0-.707l-4.94-4.939a1 1 0 0 1 1.414-1.414l5.647 5.646a1.5 1.5 0 0 1 0 2.122l-5.647 5.646a1 1 0 0 1-1.414 0Z" fill="currentColor"></path></svg></div></a></div>"#,
|
|
|
);
|
|
|
- let r = find_next_chapter(&html);
|
|
|
+ let r = find_next_chapter(&html, "https://bible.com/bible/link1");
|
|
|
if !r.is_ok() {
|
|
|
println!("DEBUG result = {:?}", r);
|
|
|
}
|
|
|
assert!(r.is_ok());
|
|
|
let link = r.unwrap();
|
|
|
- assert_eq!(link, "/bible/59/GEN.2.ESV");
|
|
|
+ assert_eq!(link, "https://bible.com/bible/59/GEN.2.ESV");
|
|
|
}
|
|
|
|
|
|
/// This tests when the HTML has been tidied.
|
|
@@ -211,12 +235,12 @@ mod tests {
|
|
|
</a>
|
|
|
</div>"#,
|
|
|
);
|
|
|
- let r = find_next_chapter(&html);
|
|
|
+ let r = find_next_chapter(&html, "https://bible.com/bible/link1");
|
|
|
if !r.is_ok() {
|
|
|
println!("DEBUG result = {:?}", r);
|
|
|
}
|
|
|
assert!(r.is_ok());
|
|
|
let link = r.unwrap();
|
|
|
- assert_eq!(link, "/bible/59/GEN.2.ESV");
|
|
|
+ assert_eq!(link, "https://bible.com/bible/59/GEN.2.ESV");
|
|
|
}
|
|
|
}
|