|
@@ -1,9 +1,6 @@
|
|
|
-use super::config;
|
|
|
-use anyhow::{bail, Context, Result};
|
|
|
+use anyhow::{Result};
|
|
|
|
|
|
use scraper;
|
|
|
-use scraper::Element;
|
|
|
-use std::collections::HashSet;
|
|
|
use std::string::String;
|
|
|
|
|
|
/// Extract element text, trimmed of whitespace.
|
|
@@ -24,619 +21,3 @@ pub fn find_useragents(html: &str) -> Result<Vec<String>> {
|
|
|
}
|
|
|
Ok(result)
|
|
|
}
|
|
|
-
|
|
|
-/*
|
|
|
-/// Find newest Firefox version number.
|
|
|
-pub fn find_versions(html: &String) -> Result<String> {
|
|
|
- let document = scraper::Html::parse_document(&html);
|
|
|
- let select_a = scraper::Selector::parse("a").unwrap();
|
|
|
- let version_match = Regex::new(r#"^[0-9.]+$"#).unwrap();
|
|
|
- for a in document.select(&select_a) {
|
|
|
- let text = element_text(a);
|
|
|
-
|
|
|
- // Or: "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:{}) Gecko/20100101 Firefox/{}"
|
|
|
- if version_match.is_match(&text) {
|
|
|
- return Ok(format!(
|
|
|
- "Mozilla/5.0 (X11; Linux x86_64; rv:{}) Gecko/20100101 Firefox/{}",
|
|
|
- text, text
|
|
|
- ));
|
|
|
- }
|
|
|
- }
|
|
|
- bail!("Could not locate a version string.");
|
|
|
-}
|
|
|
-
|
|
|
-#[allow(dead_code)]
|
|
|
-/// Find next sibling element.
|
|
|
-fn next_element(element: scraper::ElementRef<'_>) -> Result<scraper::ElementRef<'_>> {
|
|
|
- let next_node = element.next_sibling_element();
|
|
|
-
|
|
|
- if let Some(node) = next_node {
|
|
|
- return Ok(node);
|
|
|
- }
|
|
|
- bail!("No more elements.");
|
|
|
-}
|
|
|
-
|
|
|
-pub struct VerseOfDay {
|
|
|
- pub date: String,
|
|
|
- pub verse: String,
|
|
|
- pub reference: String,
|
|
|
-}
|
|
|
-
|
|
|
-/// Extract element text, trimmed of whitespace.
|
|
|
-fn element_text(element: scraper::ElementRef<'_>) -> String {
|
|
|
- let text = element
|
|
|
- .text()
|
|
|
- .map(|s| s.trim_matches(char::is_whitespace))
|
|
|
- .filter(|x| !x.is_empty())
|
|
|
- .collect::<String>();
|
|
|
- text
|
|
|
-}
|
|
|
-
|
|
|
-/// Extract element verse text
|
|
|
-///
|
|
|
-/// This trims the elements, (translating " " to "\n").
|
|
|
-/// Joins with a single space.
|
|
|
-#[allow(dead_code)]
|
|
|
-fn verse_element_text(element: scraper::ElementRef<'_>) -> String {
|
|
|
- let span_class = scraper::Selector::parse("span[class]").unwrap();
|
|
|
- let text: String = element
|
|
|
- .select(&span_class)
|
|
|
- .filter(|e| {
|
|
|
- if let Some(c) = e.attr("class") {
|
|
|
- return c.contains("content");
|
|
|
- }
|
|
|
- false
|
|
|
- })
|
|
|
- .map(|e| {
|
|
|
- let text: String = e.text().collect::<String>();
|
|
|
- if text == " " {
|
|
|
- return String::from("\n");
|
|
|
- } else {
|
|
|
- return text.trim().to_string();
|
|
|
- }
|
|
|
- })
|
|
|
- .collect::<Vec<String>>()
|
|
|
- .join(" ");
|
|
|
- text
|
|
|
-}
|
|
|
-
|
|
|
-pub fn find_vod(html: &String) -> Result<Vec<VerseOfDay>> {
|
|
|
- let document = scraper::Html::parse_document(&html);
|
|
|
- // let a_selector = scraper::Selector::parse(r#"div>a[href^="/bible/"]"#).unwrap();
|
|
|
- let mut result: Vec<VerseOfDay> = Vec::new();
|
|
|
-
|
|
|
- // How about this?
|
|
|
- /*
|
|
|
- This was build by looking at the structure of the HTML.
|
|
|
- What I looked for, was, something that would contain all of the items I was
|
|
|
- interested in. Select it in the Web Developer tool. When everything you
|
|
|
- want is highlighted in the browser page, that's the tag you want.
|
|
|
- In this case, it was main div div div div. Tag p contained the date.
|
|
|
- Tags a in a div[class="mbs-2"] had verse and reference.
|
|
|
- */
|
|
|
-
|
|
|
- {
|
|
|
- // Locate the Verse of the Day div tag.
|
|
|
- let vod_div_select = scraper::Selector::parse("main>div>div>div>div").unwrap();
|
|
|
- if let Some(vod_div) = document.select(&vod_div_select).next() {
|
|
|
- // Ok, search just in this div for things of interest.
|
|
|
- /*
|
|
|
- // h1 text is "Verse of the Day"
|
|
|
- let h1_select = scraper::Selector::parse("h1").unwrap();
|
|
|
- let h1 = vod_div.select(&h1_select).next().unwrap();
|
|
|
- println!("h1 = {}", element_text(h1)); //h1.text().collect::<Vec<_>>());
|
|
|
- */
|
|
|
- let p_select = scraper::Selector::parse("p").unwrap();
|
|
|
- let p = vod_div.select(&p_select).next().unwrap();
|
|
|
- // println!("p = {}", element_text(p)); // p.text().collect::<Vec<_>>());
|
|
|
-
|
|
|
- let a_select = scraper::Selector::parse(r#"div[class~="mbs-2"]>a"#).unwrap();
|
|
|
- let mut verse_info = vod_div
|
|
|
- .select(&a_select)
|
|
|
- .map(|a| element_text(a))
|
|
|
- .collect::<Vec<String>>();
|
|
|
-
|
|
|
- if verse_info.len() == 2 {
|
|
|
- result.push(VerseOfDay {
|
|
|
- date: element_text(p),
|
|
|
- verse: verse_info.remove(0),
|
|
|
- reference: verse_info.remove(0),
|
|
|
- });
|
|
|
- } else {
|
|
|
- bail!("Unable to locate today's verse. Has the HTML changed?");
|
|
|
- }
|
|
|
- /*
|
|
|
- for a in vod_div.select(&a_select) {
|
|
|
- println!("a = {}", element_text(a)); // a.text().collect::<Vec<_>>());
|
|
|
- }
|
|
|
- */
|
|
|
- }
|
|
|
- }
|
|
|
-
|
|
|
- // Previous ones are in div[class="mlb-2"]
|
|
|
-
|
|
|
- let prev_div_selector = scraper::Selector::parse(r#"div[class="mlb-2"]"#).unwrap();
|
|
|
- let a_selector1 =
|
|
|
- scraper::Selector::parse(r#"a[href^="/bible/"][class~="no-underline"]"#).unwrap();
|
|
|
- let p_selector = scraper::Selector::parse("div>p").unwrap();
|
|
|
-
|
|
|
- println!("=====");
|
|
|
-
|
|
|
- for prev_div in document.select(&prev_div_selector) {
|
|
|
- if let Some(p) = prev_div.select(&p_selector).next() {
|
|
|
- let mut verse_info = prev_div
|
|
|
- .select(&a_selector1)
|
|
|
- .map(|a| element_text(a))
|
|
|
- .collect::<Vec<String>>();
|
|
|
- if verse_info.len() == 2 {
|
|
|
- result.push(VerseOfDay {
|
|
|
- date: element_text(p),
|
|
|
- verse: verse_info.remove(0),
|
|
|
- reference: verse_info.remove(0),
|
|
|
- });
|
|
|
- }
|
|
|
- // println!("{}", element_text(p)); // p.text().collect::<Vec<_>>());
|
|
|
- }
|
|
|
- }
|
|
|
- Ok(result)
|
|
|
-}
|
|
|
-
|
|
|
-fn parse_html_file(filename: &str) -> Result<scraper::Html> {
|
|
|
- let buffer =
|
|
|
- std::fs::read_to_string(filename).context(format!("Failed to read: {}", filename))?;
|
|
|
- Ok(scraper::Html::parse_document(&buffer))
|
|
|
-}
|
|
|
-
|
|
|
-// This shows child elements correctly.
|
|
|
-// If I could build a structure of the chapter, maybe I could parse it?
|
|
|
-// I would at least know what to look for...
|
|
|
-
|
|
|
-/// Display the structure of the HTML
|
|
|
-///
|
|
|
-/// This shows a properly indented layout of the HTML tags.
|
|
|
-/// It shows what is nested in what, and what attributes the element
|
|
|
-/// has. (And it doesn't delete empty tags like html tidy does.)
|
|
|
-pub fn show_structure(element: scraper::element_ref::ElementRef<'_>, depth: u32) {
|
|
|
- // For output formatting.
|
|
|
- let spacer = " ".repeat(depth as usize * 4);
|
|
|
-
|
|
|
- // This can be multiple classes, so watch out here.
|
|
|
- // let cls = element.attr("class").unwrap();
|
|
|
- println!(
|
|
|
- "{} {} E {} {:?}",
|
|
|
- depth,
|
|
|
- spacer,
|
|
|
- element.value().name(),
|
|
|
- element.value()
|
|
|
- );
|
|
|
-
|
|
|
- if element.has_children() {
|
|
|
- // This always seem to think there's children elements. ?!?
|
|
|
- // println!(" >>");
|
|
|
- for child in element.child_elements() {
|
|
|
- show_structure(child, depth + 1);
|
|
|
- }
|
|
|
- }
|
|
|
-}
|
|
|
-
|
|
|
-/// Verse information
|
|
|
-#[derive(Debug)]
|
|
|
-pub enum VerseInformation {
|
|
|
- Heading(String),
|
|
|
- /// Chapter and Verse "3.16"
|
|
|
- ChapterVerse(String),
|
|
|
- Content {
|
|
|
- text: String,
|
|
|
- quoted: bool,
|
|
|
- paragraph: bool,
|
|
|
- red: bool,
|
|
|
- },
|
|
|
- /// Verse note
|
|
|
- Note(String),
|
|
|
-}
|
|
|
-
|
|
|
-pub struct WalkerParser {
|
|
|
- results: Vec<VerseInformation>,
|
|
|
- classes: HashSet<String>,
|
|
|
- paragraph: bool,
|
|
|
- chapter_verse: String,
|
|
|
-}
|
|
|
-
|
|
|
-impl WalkerParser {
|
|
|
- pub fn new() -> Self {
|
|
|
- Self {
|
|
|
- results: Vec::<VerseInformation>::new(),
|
|
|
- classes: HashSet::<String>::new(),
|
|
|
- paragraph: false,
|
|
|
- chapter_verse: String::new(),
|
|
|
- }
|
|
|
- }
|
|
|
-
|
|
|
- /// Reset the parser's internal state.
|
|
|
- pub fn clear(&mut self) {
|
|
|
- self.results.clear();
|
|
|
- self.classes.clear();
|
|
|
- self.paragraph = false;
|
|
|
- self.chapter_verse.clear();
|
|
|
- }
|
|
|
-
|
|
|
- /// Extract element text, trimmed of whitespace.
|
|
|
- fn element_text(element: scraper::ElementRef<'_>) -> String {
|
|
|
- let text = element
|
|
|
- .text()
|
|
|
- .map(|s| s.trim_matches(char::is_whitespace))
|
|
|
- .filter(|x| !x.is_empty())
|
|
|
- .collect::<String>();
|
|
|
- text
|
|
|
- }
|
|
|
-
|
|
|
- /// Clean element class, and return in a set.
|
|
|
- ///
|
|
|
- /// Classes that have __ in them are returned without the __ and ...
|
|
|
- fn clean_class(element: scraper::element_ref::ElementRef<'_>) -> HashSet<String> {
|
|
|
- let mut result = HashSet::<String>::new();
|
|
|
- if let Some(e_class) = element.attr("class") {
|
|
|
- for c in e_class.split(" ") {
|
|
|
- if let Some(chapter) = c.split_once("__") {
|
|
|
- result.insert(chapter.0.to_string());
|
|
|
- } else {
|
|
|
- result.insert(c.to_string());
|
|
|
- }
|
|
|
- }
|
|
|
- }
|
|
|
- result
|
|
|
- }
|
|
|
-
|
|
|
- /// Add note
|
|
|
- ///
|
|
|
- /// This will append to a previous note, if the last item in result
|
|
|
- /// is a VerseInformation::Note.
|
|
|
- fn add_note(&mut self, note: &str) {
|
|
|
- if let Some(last) = self.results.last_mut() {
|
|
|
- if let VerseInformation::Note(n) = last {
|
|
|
- n.push_str(" ");
|
|
|
- n.push_str(note);
|
|
|
- return;
|
|
|
- }
|
|
|
- }
|
|
|
- self.results.push(VerseInformation::Note(note.to_string()));
|
|
|
- }
|
|
|
-
|
|
|
- fn add_content(&mut self, c: VerseInformation) {
|
|
|
- if let VerseInformation::Content {
|
|
|
- text: ref c_text,
|
|
|
- quoted: c_q,
|
|
|
- paragraph: c_p,
|
|
|
- red: c_r,
|
|
|
- } = c
|
|
|
- {
|
|
|
- // I have the Content in a more usable form.
|
|
|
- let mut insert = false;
|
|
|
- if let Some(last) = self.results.last_mut() {
|
|
|
- if let VerseInformation::Content {
|
|
|
- text: l_text,
|
|
|
- quoted: l_q,
|
|
|
- paragraph: _l_p,
|
|
|
- red: l_r,
|
|
|
- } = last
|
|
|
- {
|
|
|
- if *l_q != c_q || *l_r != c_r {
|
|
|
- insert = true;
|
|
|
- }
|
|
|
- if c_p {
|
|
|
- insert = true;
|
|
|
- }
|
|
|
- // Tests are done.
|
|
|
- if !insert {
|
|
|
- l_text.push_str(" ");
|
|
|
- l_text.push_str(&c_text);
|
|
|
- return;
|
|
|
- }
|
|
|
- }
|
|
|
- }
|
|
|
- self.results.push(c);
|
|
|
- } else {
|
|
|
- panic!("Expected VerseInformation::Content not {:?}", c);
|
|
|
- }
|
|
|
- }
|
|
|
-
|
|
|
- /// Recursively called to handle child elements.
|
|
|
- ///
|
|
|
- /// self.classes contains the parent's classes.
|
|
|
- /// class_hash contains the current element's classes.
|
|
|
- fn recursive_walker(&mut self, element: scraper::element_ref::ElementRef<'_>) {
|
|
|
- let class_hash = Self::clean_class(element);
|
|
|
- if self.classes.contains("ChapterContent_note") {
|
|
|
- // We're in the note.
|
|
|
-
|
|
|
- if class_hash.contains("ChapterContent_body") {
|
|
|
- // Note body.
|
|
|
- let mut has_children = false;
|
|
|
- for child in element.child_elements() {
|
|
|
- has_children = true;
|
|
|
- if let Some(cl) = child.attr("class") {
|
|
|
- if cl.contains("_label__") || cl.contains("_fr__") {
|
|
|
- continue;
|
|
|
- }
|
|
|
- }
|
|
|
- let text = Self::element_text(child);
|
|
|
- if !text.is_empty() {
|
|
|
- self.add_note(&Self::element_text(child));
|
|
|
- }
|
|
|
- }
|
|
|
-
|
|
|
- if !has_children {
|
|
|
- let text = Self::element_text(element);
|
|
|
- if !text.is_empty() {
|
|
|
- self.add_note(&text);
|
|
|
- }
|
|
|
- }
|
|
|
- // Since we've handled children elements here, we're done here.
|
|
|
- return;
|
|
|
- }
|
|
|
- }
|
|
|
-
|
|
|
- if class_hash.contains("ChapterContent_verse") {
|
|
|
- if let Some(ch_v) = element.attr("data-usfm") {
|
|
|
- if self.chapter_verse != ch_v {
|
|
|
- self.chapter_verse = ch_v.to_string();
|
|
|
- self.results
|
|
|
- .push(VerseInformation::ChapterVerse(ch_v.to_string()));
|
|
|
- }
|
|
|
- }
|
|
|
- }
|
|
|
-
|
|
|
- if class_hash.contains("ChapterContent_content") {
|
|
|
- // Content.
|
|
|
- //
|
|
|
- // _qt__ shows up in the NIV intro.
|
|
|
-
|
|
|
- let quoted = self.classes.contains("ChapterContent_q1")
|
|
|
- || self.classes.contains("ChapterContent_q2")
|
|
|
- || self.classes.contains("ChapterContent_qt");
|
|
|
-
|
|
|
- // _wj__ seen in documents.
|
|
|
- // _wordsofchrist was in the css stylesheet, but not seen.
|
|
|
-
|
|
|
- let red = self.classes.contains("ChapterContent_wj")
|
|
|
- || self.classes.contains("ChapterContent_wordsofchrist");
|
|
|
- let text = Self::element_text(element);
|
|
|
- if !text.is_empty() {
|
|
|
- let paragraph = self.paragraph;
|
|
|
- if paragraph {
|
|
|
- self.paragraph = false;
|
|
|
- }
|
|
|
-
|
|
|
- self.add_content(VerseInformation::Content {
|
|
|
- text,
|
|
|
- quoted,
|
|
|
- paragraph,
|
|
|
- red,
|
|
|
- });
|
|
|
- }
|
|
|
- }
|
|
|
-
|
|
|
- if class_hash.contains("ChapterContent_heading") {
|
|
|
- let text = Self::element_text(element);
|
|
|
- if !text.is_empty() {
|
|
|
- self.results.push(VerseInformation::Heading(text));
|
|
|
- }
|
|
|
- }
|
|
|
-
|
|
|
- if class_hash.contains("ChapterContent_p") {
|
|
|
- self.paragraph = true;
|
|
|
- }
|
|
|
-
|
|
|
- // Unfortunately, has_children always returns true?
|
|
|
-
|
|
|
- if element.has_children() {
|
|
|
- // Add element classes to class tracker.
|
|
|
- for element_class in class_hash.iter() {
|
|
|
- self.classes.insert(element_class.clone());
|
|
|
- }
|
|
|
-
|
|
|
- for child in element.child_elements() {
|
|
|
- self.recursive_walker(child);
|
|
|
- }
|
|
|
-
|
|
|
- for element_class in class_hash.iter() {
|
|
|
- self.classes.remove(element_class);
|
|
|
- }
|
|
|
- }
|
|
|
- }
|
|
|
-
|
|
|
- /// Parse the element (and children) into VerseInformation.
|
|
|
- pub fn parse(&mut self, element: scraper::element_ref::ElementRef<'_>) -> &[VerseInformation] {
|
|
|
- self.clear();
|
|
|
- self.recursive_walker(element);
|
|
|
- self.results.as_slice()
|
|
|
- }
|
|
|
-}
|
|
|
-
|
|
|
-/// Extract just the Chapter's verses.
|
|
|
-///
|
|
|
-/// Returns Book, Chapter, and Verses
|
|
|
-pub fn extract_verses(filename: &str) -> Result<(String, u8, config::BasicVersesJSON)> {
|
|
|
- let mut result = config::BasicVersesJSON::new();
|
|
|
- let document = parse_html_file(filename)?;
|
|
|
- let h1_selector = scraper::Selector::parse("h1").unwrap();
|
|
|
- let h1 = document.select(&h1_selector).next().unwrap();
|
|
|
- let mut book = element_text(h1);
|
|
|
-
|
|
|
- // Remove chapter number from book.
|
|
|
- while book.pop() != Some(' ') {
|
|
|
- // Check for a problem.
|
|
|
- if book.is_empty() {
|
|
|
- bail!(format!(
|
|
|
- "Failed to trim the chapter from [{}].",
|
|
|
- element_text(h1)
|
|
|
- ));
|
|
|
- }
|
|
|
- }
|
|
|
-
|
|
|
- let mut chapter_number: u8 = 0;
|
|
|
- let mut verse_number: u8 = 0;
|
|
|
- let mut walker = WalkerParser::new();
|
|
|
-
|
|
|
- // Locate the div that contains all of the chapter verses
|
|
|
- let chapter_selector = scraper::Selector::parse(r#"div[class*="_chapter__"]"#).unwrap();
|
|
|
- if let Some(chapter) = document.select(&chapter_selector).next() {
|
|
|
- // Ok, this is the chapter section.
|
|
|
-
|
|
|
- // This works amazingly well for showing how the html is structured.
|
|
|
- show_structure(chapter, 0);
|
|
|
- let results = walker.parse(chapter);
|
|
|
-
|
|
|
- println!("Elements: {:?}", results);
|
|
|
-
|
|
|
- let mut heading = String::new();
|
|
|
-
|
|
|
- for r in results {
|
|
|
- match r {
|
|
|
- VerseInformation::Heading(h) => {
|
|
|
- heading = h.clone();
|
|
|
- }
|
|
|
- VerseInformation::ChapterVerse(cv) => {
|
|
|
- let parts = cv.split(".").collect::<Vec<_>>();
|
|
|
- chapter_number = parts[1].parse().unwrap();
|
|
|
- verse_number = parts[2].parse().unwrap();
|
|
|
-
|
|
|
- if !heading.is_empty() {
|
|
|
- let v = result.verse(verse_number as usize);
|
|
|
- v.push(config::BasicVerseJSON::Heading(heading.clone()));
|
|
|
- heading.clear();
|
|
|
- }
|
|
|
- }
|
|
|
- VerseInformation::Note(n) => {
|
|
|
- if verse_number == 0 {
|
|
|
- println!("DERP! verse_number is zero! Note: {}", n);
|
|
|
- } else {
|
|
|
- let v = result.verse(verse_number as usize);
|
|
|
- v.push(config::BasicVerseJSON::Note(n.clone()));
|
|
|
- }
|
|
|
- }
|
|
|
- VerseInformation::Content {
|
|
|
- text,
|
|
|
- quoted,
|
|
|
- paragraph,
|
|
|
- red,
|
|
|
- } => {
|
|
|
- if verse_number == 0 {
|
|
|
- println!("DERP! verse_number is zero! Content: {}!", text);
|
|
|
- } else {
|
|
|
- let v = result.verse(verse_number as usize);
|
|
|
- v.push(config::BasicVerseJSON::Verse {
|
|
|
- text: text.to_string(),
|
|
|
- paragraph: *paragraph,
|
|
|
- quote: *quoted,
|
|
|
- red: *red,
|
|
|
- });
|
|
|
- }
|
|
|
- }
|
|
|
- }
|
|
|
- }
|
|
|
- } else {
|
|
|
- bail!("Unable to locate the div tag with _chapter__.");
|
|
|
- }
|
|
|
-
|
|
|
- Ok((book, chapter_number, result))
|
|
|
-}
|
|
|
-
|
|
|
-pub fn find_next_chapter(html: &String) -> Result<String> {
|
|
|
- let document = scraper::Html::parse_document(html);
|
|
|
- // let a_selector = scraper::Selector::parse("div>a").unwrap();
|
|
|
- // This one works: (starts with "/bible/").
|
|
|
- // let a_selector = scraper::Selector::parse(r#"div>a[href ^= "/bible/"]"#).unwrap();
|
|
|
- // This one fails to find what we're looking for. Contains /bible/ or "bible" both fail.
|
|
|
- // Ok, using href~="/bible/" fails. It looks for the WORD "/bible/".
|
|
|
- // Using *= finds it anywhere. Ah HA!
|
|
|
- let a_selector = scraper::Selector::parse(r#"div>a[href*="/bible/"]"#).unwrap();
|
|
|
-
|
|
|
- for a in document.select(&a_selector) {
|
|
|
- if a.attr("class").is_some() {
|
|
|
- continue;
|
|
|
- }
|
|
|
-
|
|
|
- // Since the selector finds href containing /bible/, I don't need some of these tests now.
|
|
|
- // I still need this one, so I have the href value.
|
|
|
-
|
|
|
- if let Some(href) = a.attr("href") {
|
|
|
- // if href.contains("/bible/") {
|
|
|
- // let href_absolute = relative_to_absolute(url, href)?;
|
|
|
-
|
|
|
- let text = a
|
|
|
- .text()
|
|
|
- .map(|s| {
|
|
|
- s.trim_matches(char::is_whitespace) // &[' ', '\n', '\t'])
|
|
|
- })
|
|
|
- .filter(|x| {
|
|
|
- !x.is_empty()
|
|
|
- // x.chars().any(|c| (c != ' ') && (c != '\n'))
|
|
|
- })
|
|
|
- .collect::<Vec<_>>();
|
|
|
- // println!("TEXT: {:?}", text);
|
|
|
-
|
|
|
- if text.len() != 1 {
|
|
|
- continue;
|
|
|
- }
|
|
|
- if text[0] != "Next Chapter" {
|
|
|
- // println!("Found: [{:?}]", text[0]);
|
|
|
- continue;
|
|
|
- }
|
|
|
- return Ok(href.to_string());
|
|
|
-
|
|
|
- // } else {
|
|
|
- // println!("href contains: [{}]", href);
|
|
|
- // }
|
|
|
- }
|
|
|
- }
|
|
|
- bail!("Next Chapter not found.");
|
|
|
-}
|
|
|
-
|
|
|
-#[cfg(test)]
|
|
|
-mod tests {
|
|
|
- use super::*;
|
|
|
-
|
|
|
- /// Test HTML as given to us by the website.
|
|
|
- #[test]
|
|
|
- fn chapter_test() {
|
|
|
- let html = String::from(
|
|
|
- r#"<div class="[pointer-events:all]"><a href="/bible/59/GEN.2.ESV"><div class="flex items-center justify-center bg-white z-[5] h-[50px] w-[50px] rounded-full border-gray-15 border-small border-solid shadow-light-1 active:shadow-dark-1 active:bg-gray-5 active:border-b-gray-5"><svg width="25" height="25" viewBox="0 0 24 24" fill="none" xmlns="http://www.w3.org/2000/svg" aria-labelledby="Next Chapter" class="text-gray-25"><title id="Next Chapter">Next Chapter</title><path fill-rule="evenodd" clip-rule="evenodd" d="M8.293 18.707a1 1 0 0 1 0-1.414l4.94-4.94a.5.5 0 0 0 0-.707l-4.94-4.939a1 1 0 0 1 1.414-1.414l5.647 5.646a1.5 1.5 0 0 1 0 2.122l-5.647 5.646a1 1 0 0 1-1.414 0Z" fill="currentColor"></path></svg></div></a></div>"#,
|
|
|
- );
|
|
|
- let r = find_next_chapter(&html);
|
|
|
- if !r.is_ok() {
|
|
|
- println!("DEBUG result = {:?}", r);
|
|
|
- }
|
|
|
- assert!(r.is_ok());
|
|
|
- let link = r.unwrap();
|
|
|
- assert_eq!(link, "/bible/59/GEN.2.ESV");
|
|
|
- }
|
|
|
-
|
|
|
- /// This tests when the HTML has been tidied.
|
|
|
- ///
|
|
|
- /// HTML has newlines and spaces added, rather then condensed.
|
|
|
- #[test]
|
|
|
- fn chapter_test_tidy() {
|
|
|
- let html = String::from(
|
|
|
- r#"<div>
|
|
|
- <a href="/bible/59/GEN.2.ESV">
|
|
|
- <div class="flex items-center justify-center bg-white z-[5] h-[50px] w-[50px] rounded-full border-gray-15 border-small border-solid shadow-light-1 active:shadow-dark-1 active:bg-gray-5 active:border-b-gray-5">
|
|
|
- <svg width="25" height="25" viewBox="0 0 24 24" fill="none" xmlns="http://www.w3.org/2000/svg" aria-labelledby="Next Chapter" class="text-gray-25">
|
|
|
- <title id="Next Chapter">Next Chapter</title>
|
|
|
- <path fill-rule="evenodd" clip-rule="evenodd" d="M8.293 18.707a1 1 0 0 1 0-1.414l4.94-4.94a.5.5 0 0 0 0-.707l-4.94-4.939a1 1 0 0 1 1.414-1.414l5.647 5.646a1.5 1.5 0 0 1 0 2.122l-5.647 5.646a1 1 0 0 1-1.414 0Z" fill="currentColor">
|
|
|
- </path>
|
|
|
- </svg>
|
|
|
- </div>
|
|
|
- </a>
|
|
|
- </div>"#,
|
|
|
- );
|
|
|
- let r = find_next_chapter(&html);
|
|
|
- if !r.is_ok() {
|
|
|
- println!("DEBUG result = {:?}", r);
|
|
|
- }
|
|
|
- assert!(r.is_ok());
|
|
|
- let link = r.unwrap();
|
|
|
- assert_eq!(link, "/bible/59/GEN.2.ESV");
|
|
|
- }
|
|
|
-}
|
|
|
-*/
|