Browse Source

Working Verse of the Day parser.

Steve Thielemann 1 month ago
parent
commit
4dc68eb3d0
2 changed files with 64 additions and 21 deletions
  1. 18 10
      src/main.rs
  2. 46 11
      src/parse.rs

+ 18 - 10
src/main.rs

@@ -322,21 +322,29 @@ fn main() -> Result<()> {
             */
         }
 
-        Some(Commands::Verse{ fetch}) => {
+        Some(Commands::Verse { fetch }) => {
             let client = reqwest::blocking::Client::builder()
-            .user_agent(APP_USER_AGENT)
-            .build()?;
+                .user_agent(APP_USER_AGENT)
+                .build()?;
 
             println!("Verse of the day.");
-            let result = fetch::fetch_cache(cli.work
-                .as_os_str()
-                .to_str()
-                .expect("Work should be valid."), &client, VOD_URL)?;
+            let result = fetch::fetch_cache(
+                cli.work
+                    .as_os_str()
+                    .to_str()
+                    .expect("Work should be valid."),
+                &client,
+                VOD_URL,
+            )?;
             if result.cached {
                 println!("(from cache):");
             }
-            let _v = parse::find_vod(&result.html);
-
+            for v in parse::find_vod(&result.html)? {
+                println!("Date:  {}", v.date);
+                println!("Verse: {}", v.verse);
+                println!("Ref:   {}", v.reference);
+                println!("------");
+            };
         }
         Some(Commands::Test {}) => {
             println!("Testing...");
@@ -385,7 +393,7 @@ fn main() -> Result<()> {
         }
         None => {
             println!("I didn't see a command.  Displaying help.\n");
-            let _show_help : Cli = Cli::parse_from(["--help"]);
+            let _show_help: Cli = Cli::parse_from(["--help"]);
         }
     }
 

+ 46 - 11
src/parse.rs

@@ -11,27 +11,56 @@ fn next_element(element: scraper::ElementRef<'_>) -> Result<scraper::ElementRef<
     bail!("No more elements.");
 }
 
-pub fn find_vod(html: &String) -> Result<String> {
+pub struct VerseOfDay {
+    pub date: String,
+    pub verse: String,
+    pub reference: String,
+}
+
+fn element_text(element: scraper::ElementRef<'_>) -> String {
+    let text = element.text()
+        .map(|s| {
+            s.trim_matches(char::is_whitespace)
+        })
+        .filter(|x| {
+            !x.is_empty()
+        })
+        .collect::<String>();
+    text
+}
+
+pub fn find_vod(html: &String) -> Result<Vec<VerseOfDay>> {
     let document = scraper::Html::parse_document(&html);
     // let a_selector = scraper::Selector::parse(r#"div>a[href^="/bible/"]"#).unwrap();
-    let mut result: Vec<(String, String)> = Vec::new();
+    let mut result: Vec<VerseOfDay> = Vec::new();
 
     // How about this?
     {
+        // Locate the Verse of the Day div tag.
         let vod_div_select = scraper::Selector::parse("main>div>div>div>div").unwrap();
         if let Some(vod_div) = document.select(&vod_div_select).next() {
             // Ok, search just in this div for things of interest.
+            /* 
+            // h1 text is "Verse of the Day"
             let h1_select = scraper::Selector::parse("h1").unwrap();
             let h1 = vod_div.select(&h1_select).next().unwrap();
-            println!("h1 = {:?}", h1.text().collect::<Vec<_>>());
+            println!("h1 = {}", element_text(h1)); //h1.text().collect::<Vec<_>>());
+            */
             let p_select = scraper::Selector::parse("p").unwrap();
             let p = vod_div.select(&p_select).next().unwrap();
-            println!("p = {:?}", p.text().collect::<Vec<_>>());
+            // println!("p = {}", element_text(p)); // p.text().collect::<Vec<_>>());
 
             let a_select = scraper::Selector::parse(r#"div[class~="mbs-2"]>a"#).unwrap();
+            let mut verse_info = vod_div.select(&a_select).map(|a| element_text(a)).collect::<Vec<String>>();
+
+            if verse_info.len() == 2 {
+                result.push(VerseOfDay{date: element_text(p), verse:verse_info.remove(0), reference:verse_info.remove(0)});
+            }
+            /* 
             for a in vod_div.select(&a_select) {
-                println!("a = {:?}", a.text().collect::<Vec<_>>());
+                println!("a = {}", element_text(a)); // a.text().collect::<Vec<_>>());
             }
+            */
         }
     }
 
@@ -56,21 +85,26 @@ pub fn find_vod(html: &String) -> Result<String> {
     let p_selector = scraper::Selector::parse("div>p").unwrap();
 
     println!("=====");
-    
+
     for prev_div in document.select(&prev_div_selector) {
         if let Some(p) = prev_div.select(&p_selector).next() {
-            println!("{:?}", p.text().collect::<Vec<_>>());
+            let mut verse_info = prev_div.select(&a_selector1).map(|a| element_text(a)).collect::<Vec<String>>();
+            if verse_info.len() == 2 {
+                result.push(VerseOfDay{date: element_text(p), verse: verse_info.remove(0), reference: verse_info.remove(0)});
+            }
+            // println!("{}", element_text(p)); // p.text().collect::<Vec<_>>());
         }
 
-        let mut last_verse = String::new();
+        /* 
         for a in prev_div.select(&a_selector1) {
             if let Some(href) = a.attr("href") {
-                let text = a.text().collect::<Vec<_>>();
-                println!("{:?}", text);
+                // let text = a.text().collect::<Vec<_>>();
+                println!("{}", element_text(a)); // text);
                 // println!("html: {}", a.html());
             };
         }
         println!("-----");
+        */
     }
 
     /*
@@ -86,7 +120,8 @@ pub fn find_vod(html: &String) -> Result<String> {
         };
     }
     */
-    bail!("More dERP!");
+    Ok(result)
+    // bail!("More dERP!");
 }
 
 pub fn find_next_chapter(html: &String) -> Result<String> {