Browse Source

Initial Chapter collecting.

Steve Thielemann 1 month ago
parent
commit
b8410b368c
1 changed files with 43 additions and 37 deletions
  1. 43 37
      src/main.rs

+ 43 - 37
src/main.rs

@@ -1,8 +1,6 @@
+use clap::{Parser, Subcommand};
+use core::slice::Iter;
 use reqwest;
-
-// use std::io::Write;
-// use std::io::Read;
-
 use scraper;
 use std::{
     collections::HashMap,
@@ -12,10 +10,10 @@ use std::{
     string::String,
     sync::LazyLock,
 };
-
-use clap::{Parser, Subcommand};
 use std::{thread, time::Duration};
 
+// Setup the command line options
+
 #[derive(Parser)]
 #[command(about, long_about=None)]
 struct Cli {
@@ -44,6 +42,10 @@ enum Commands {
         /// Count
         #[arg(short, long, default_value = "5")]
         count: u32,
+
+        /// All
+        #[arg(short, long, action=clap::ArgAction::SetTrue)]
+        all: bool,
     },
     /// Test something out
     Test {},
@@ -89,6 +91,9 @@ fn find_files(base_dir: &str, bible: &str) -> Vec<String> {
             }
         }
     }
+
+    // It would be nice to sort these (by book and chapter), so they are in order.
+    // Should I just return file_names instead of path?
     result
 }
 
@@ -217,13 +222,17 @@ fn main() {
             }
             println!("I'm finished fetching!");
         }
-        Some(Commands::Extract { count}) => {
+
+        Some(Commands::Extract { count, all }) => {
             println!("Extract...");
             let mut files = find_files(cli.work.to_str().unwrap(), cli.bible.as_str());
             files.insert(0, String::from("bible/GEN.1.NIV"));
 
-            for file in files.iter().take(*count as usize) {
+            let mut chapters: HashMap<String, String> = HashMap::<String, String>::new();
+
+            let mut extractor = |file| {
                 println!("File: {}", file);
+
                 let buffer = std::fs::read_to_string(Path::new(file)).unwrap();
                 let document = scraper::Html::parse_document(&buffer);
 
@@ -249,19 +258,9 @@ fn main() {
                 for span in document.select(&span_data_usfm) {
                     // This will always be successful.
                     if let Some(data) = span.attr("data-usfm") {
+                        // There can be multples of these with matching values.
                         println!("data-usfm {}:", data);
-                        // let mut lines = String::new();
-
-                        // let mut lines = Vec::<&str>::new();
-                        /*
-                        for ds in span.select(&span_class) {
-                            let c = ds.attr("class").unwrap();
-                            let text = ds.text().collect::<Vec<_>>();
-                            println!("ds class {} text: {:?}", c, text);
-                        }
-                        */
 
-                        // let lines : String = span.select(&span_class_content)
                         let lines: String = span
                             .select(&span_class)
                             // Only allow elements with attr class that containts "content"
@@ -292,35 +291,42 @@ fn main() {
                             })
                             .collect();
 
-                        /*
-                        .fold(String::new(), |acc, x| {
-                            format!("{:?}", x)
-                        });
-                        */
-
-                        /*
-                        for data_span in span.select(&span_selector) {
-                            if let Some(data_class) = data_span.attr("class") {
-                                if data_class.contains("content") {
-                                    let mut text = data_span.text().collect::<Vec<_>>();
-                                    println!("{} {:?}", data, text);
-                                    lines.append(&mut text);
-                                }
-                            }
-                        }
-                        */
                         println!("data {} lines {}", data, lines);
+                        if chapters.contains_key(data) {
+                            chapters.get_mut(data).unwrap().push_str(&lines);
+                        } else {
+                            chapters.insert(data.to_string(), lines);
+                        }
                     }
                 }
+            };
 
+            if *all {
+                println!("Extract All:");
+                for file in files.iter() {
+                    extractor(file);
+                }
+            } else {
+                println!("Extract {}:", *count);
+                for file in files.iter().take(*count as usize) {
+                    extractor(file);
+                }
+            }
+
+            println!("Chapters: {:?}", chapters);
+
+            /*
+            // for file in files.iter().take(*count as usize) {
+            for file in files_iter {
                 /* ESV
                 JHN.8.11 ["She said, “No one, Lord.” And Jesus said, "]
                 JHN.8.11 ["“Neither do I condemn you; go, and from now on "]
                 JHN.8.11 ["sin no more.”"]
-                JHN.8.11 ["]]"]   <- What is this?
+                JHN.8.11 ["]]"]   <- What is this?  It is the the original HTML.
                 JHN.8.11 ["  "]
                 */
             }
+            */
         }
         Some(Commands::Test {}) => {
             println!("Testing...");