6 ヶ月前 · 507c90360f
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -17,6 +17,15 @@ version = "2.0.0"
 
				 source = "registry+https://github.com/rust-lang/crates.io-index"
			
 
				 checksum = "512761e0bb2578dd7380c6baaa0f4ce03e84f95e960231d1dec8bf4d7d6e2627"
			
 
				 
			
 
				+[[package]]
			
 
				+name = "aho-corasick"
			
 
				+version = "1.1.3"
			
 
				+source = "registry+https://github.com/rust-lang/crates.io-index"
			
 
				+checksum = "8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916"
			
 
				+dependencies = [
			
 
				+ "memchr",
			
 
				+]
			
 
				+
			
 
				 [[package]]
			
 
				 name = "anstream"
			
 
				 version = "0.6.18"
			
@@ -1133,6 +1142,35 @@ dependencies = [
 
				  "bitflags",
			
 
				 ]
			
 
				 
			
 
				+[[package]]
			
 
				+name = "regex"
			
 
				+version = "1.11.1"
			
 
				+source = "registry+https://github.com/rust-lang/crates.io-index"
			
 
				+checksum = "b544ef1b4eac5dc2db33ea63606ae9ffcfac26c1416a2806ae0bf5f56b201191"
			
 
				+dependencies = [
			
 
				+ "aho-corasick",
			
 
				+ "memchr",
			
 
				+ "regex-automata",
			
 
				+ "regex-syntax",
			
 
				+]
			
 
				+
			
 
				+[[package]]
			
 
				+name = "regex-automata"
			
 
				+version = "0.4.9"
			
 
				+source = "registry+https://github.com/rust-lang/crates.io-index"
			
 
				+checksum = "809e8dc61f6de73b46c85f4c96486310fe304c434cfa43669d7b40f711150908"
			
 
				+dependencies = [
			
 
				+ "aho-corasick",
			
 
				+ "memchr",
			
 
				+ "regex-syntax",
			
 
				+]
			
 
				+
			
 
				+[[package]]
			
 
				+name = "regex-syntax"
			
 
				+version = "0.8.5"
			
 
				+source = "registry+https://github.com/rust-lang/crates.io-index"
			
 
				+checksum = "2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c"
			
 
				+
			
 
				 [[package]]
			
 
				 name = "reqwest"
			
 
				 version = "0.12.12"
			
@@ -1674,8 +1712,11 @@ version = "0.1.0"
 
				 dependencies = [
			
 
				  "anyhow",
			
 
				  "clap",
			
 
				+ "regex",
			
 
				  "reqwest",
			
 
				  "scraper",
			
 
				+ "serde",
			
 
				+ "serde_json",
			
 
				  "url",
			
 
				 ]
			
 
				 
			
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -6,6 +6,9 @@ edition = "2021"
 
				 [dependencies]
			
 
				 anyhow = "1.0.95"
			
 
				 clap = { version = "4.5.27", features = ["derive"] }
			
 
				+regex = "1.11.1"
			
 
				 reqwest = { version = "0.12.12", features = ["blocking"] }
			
 
				 scraper = "0.22.0"
			
 
				+serde = { version = "1.0.217", features = ["derive"] }
			
 
				+serde_json = "1.0.138"
			
 
				 url = "2.5.4"
			
--- a/initial.config
+++ b/initial.config
@@ -0,0 +1,11 @@
 
				+{
			
 
				+  "user_agent": "Mozilla/5.0 (X11; Linux x86_64; rv:135.0) Gecko/20100101 Firefox/135.0",
			
 
				+  "versions": {
			
 
				+    "NASB2020": "https://www.bible.com/bible/2692/GEN.1.NASB2020",
			
 
				+    "MKJV": "https://www.bible.com/bible/114/GEN.1.NKJV",
			
 
				+    "YLT98": "https://www.bible.com/bible/821/GEN.1.YLT98",
			
 
				+    "KJV": "https://www.bible.com/bible/1/GEN.1.KJV",
			
 
				+    "ESV": "https://www.bible.com/bible/59/GEN.1.ESV",
			
 
				+    "NIV": "https://www.bible.com/bible/111/GEN.INTRO1.NIV"
			
 
				+  }
			
 
				+}
			
--- a/src/fetch.rs
+++ b/src/fetch.rs
@@ -1,11 +1,34 @@
 
				-use anyhow::{Context, Result};
			
 
				+use anyhow::{Context, Result, bail};
			
 
				 use std::{fs::File, io::Write, path::Path};
			
 
				+use url::Url;
			
 
				+use super::parse;
			
 
				+
			
 
				+/// Convert relate to absolute
			
 
				+pub fn relative_to_absolute(url: &str, href: &str) -> Result<String> {
			
 
				+    let base_url = Url::parse(url)?;
			
 
				+    let new_url = base_url.join(href)?;
			
 
				+    Ok(new_url.to_string())
			
 
				+}
			
 
				 
			
 
				 pub struct FetchResult {
			
 
				     pub cached: bool,
			
 
				     pub html: String,
			
 
				 }
			
 
				 
			
 
				+pub fn agent_update(user_agent: &str) -> Result<String> {
			
 
				+    let client = reqwest::blocking::Client::builder()
			
 
				+    .user_agent(user_agent)
			
 
				+    .build()?;
			
 
				+
			
 
				+    let result = fetch(&client, "https://www.mozilla.org/en-US/firefox/releases/")?;
			
 
				+    if let Ok(v) = parse::find_versions(&result) {
			
 
				+        if v != user_agent {
			
 
				+            return Ok(v);
			
 
				+        }
			
 
				+    }
			
 
				+    bail!("No user_agent updates.");
			
 
				+}
			
 
				+
			
 
				 #[allow(dead_code)]
			
 
				 // Should this always fetch it/save?
			
 
				 pub fn fetch(client: &reqwest::blocking::Client, url: &str) -> Result<String> {
			
--- a/src/main.rs
+++ b/src/main.rs
@@ -10,6 +10,7 @@ use std::{
 
				 };
			
 
				 use std::{thread, time::Duration};
			
 
				 
			
 
				+mod config;
			
 
				 mod fetch;
			
 
				 mod parse;
			
 
				 
			
@@ -26,6 +27,10 @@ struct Cli {
 
				     #[arg(short, long, default_value = "ESV")]
			
 
				     version: String,
			
 
				 
			
 
				+    /// User Agent
			
 
				+    #[arg(short, long, action=clap::ArgAction::SetTrue)]
			
 
				+    agent_update: bool,
			
 
				+
			
 
				     #[command(subcommand)]
			
 
				     command: Option<Commands>,
			
 
				 }
			
@@ -58,14 +63,19 @@ enum Commands {
 
				     Test {},
			
 
				 }
			
 
				 
			
 
				+const CONFIG_FILE : &str = "app.config";
			
 
				+
			
 
				+/*
			
 
				 static APP_USER_AGENT: &str =
			
 
				-    "Mozilla/5.0 (X11; Linux x86_64; rv:134.0) Gecko/20100101 Firefox/134.0";
			
 
				+    "Mozilla/5.0 (X11; Linux x86_64; rv:135.0) Gecko/20100101 Firefox/135.0";
			
 
				+*/
			
 
				 
			
 
				 // Not needed, I process relative URLs correctly now.
			
 
				 // static BASE_URL: &str = "https://www.bible.com";
			
 
				 
			
 
				 static VOD_URL: &str = "https://www.bible.com/verse-of-the-day";
			
 
				 
			
 
				+/*
			
 
				 static VERSION_URLS: LazyLock<HashMap<&str, &str>> = LazyLock::new(|| {
			
 
				     HashMap::from([
			
 
				         ("ESV", "https://www.bible.com/bible/59/GEN.1.ESV"),
			
@@ -76,6 +86,7 @@ static VERSION_URLS: LazyLock<HashMap<&str, &str>> = LazyLock::new(|| {
 
				         ("YLT98", "https://www.bible.com/bible/821/GEN.1.YLT98"),
			
 
				     ])
			
 
				 });
			
 
				+*/
			
 
				 
			
 
				 static BOOKS: LazyLock<Vec<&str>> = LazyLock::new(|| {
			
 
				     Vec::from([
			
@@ -148,10 +159,35 @@ fn find_files(base_dir: &str, version: &str) -> Vec<String> {
 
				 // https://www.bible.com/verse-of-the-day
			
 
				 
			
 
				 fn main() -> Result<()> {
			
 
				+    let mut config = config::read_config(CONFIG_FILE)?;
			
 
				+
			
 
				     let cli = Cli::parse();
			
 
				     // println!("Work Dir: {:?}", cli.work);
			
 
				     // println!("Bible: {:?}", cli.bible);
			
 
				 
			
 
				+    if !config.versions.contains_key(cli.version.as_str()) {
			
 
				+        println!("Sorry, I don't know about Bible Version [{}].", cli.version);
			
 
				+        println!("I do know about the following:");
			
 
				+
			
 
				+        // Keys in arbitrary order.
			
 
				+        for (name, _) in config.versions.iter() {
			
 
				+            println!("  {}", name);
			
 
				+        }
			
 
				+        return Ok(());
			
 
				+    }
			
 
				+
			
 
				+    if cli.agent_update {
			
 
				+        if let Ok(new_agent) = fetch::agent_update(&config.user_agent) {
			
 
				+            config.user_agent = new_agent;
			
 
				+            println!("User agent now {}", &config.user_agent);
			
 
				+            config::write_config(CONFIG_FILE, &config)?;
			
 
				+            return Ok(());
			
 
				+        } else {
			
 
				+            println!("User agent OK.");
			
 
				+        }
			
 
				+    }
			
 
				+
			
 
				+    /*
			
 
				     if !VERSION_URLS.contains_key(cli.version.as_str()) {
			
 
				         println!("Sorry, I don't know about Bible Version [{}].", cli.version);
			
 
				         println!("I do know about the following:");
			
@@ -162,14 +198,15 @@ fn main() -> Result<()> {
 
				         }
			
 
				         return Ok(());
			
 
				     }
			
 
				+    */
			
 
				 
			
 
				     match &cli.command {
			
 
				         Some(Commands::Fetch { delay }) => {
			
 
				             let client = reqwest::blocking::Client::builder()
			
 
				-                .user_agent(APP_USER_AGENT)
			
 
				+                .user_agent(&config.user_agent)
			
 
				                 .build()?;
			
 
				             // .unwrap();
			
 
				-            let mut url = VERSION_URLS[cli.version.as_str()].to_string();
			
 
				+            let mut url = config.versions[cli.version.as_str()].to_string();
			
 
				             println!("Fetch! [{}] with delay {} secs.", cli.version, delay);
			
 
				             let mut more = true;
			
 
				             let mut cache_hit_once = true;
			
@@ -184,20 +221,22 @@ fn main() -> Result<()> {
 
				                     url.as_str(),
			
 
				                 )?;
			
 
				 
			
 
				-                let next_chapter = parse::find_next_chapter(&result.html, &url);
			
 
				+                let next_chapter = parse::find_next_chapter(&result.html);
			
 
				 
			
 
				                 if let Ok(next_url) = next_chapter {
			
 
				                     // Ok!  We have something
			
 
				                     // more = true;
			
 
				 
			
 
				-                    /* 
			
 
				+                    /*
			
 
				                     if next_url.starts_with("/") {
			
 
				                         url = String::from(BASE_URL) + &next_url;
			
 
				                     } else {
			
 
				                         url = next_url.to_string();
			
 
				                     }
			
 
				                     */
			
 
				-                    url = next_url;
			
 
				+                    if let Ok(abs_url) = fetch::relative_to_absolute(&next_url, &url) {
			
 
				+                        url = abs_url;
			
 
				+                    }
			
 
				                 } else {
			
 
				                     // We didn't find the Next Chapter link, so stop.
			
 
				                     more = false;
			
@@ -329,7 +368,7 @@ fn main() -> Result<()> {
 
				 
			
 
				         Some(Commands::Verse { fetch: _ }) => {
			
 
				             let client = reqwest::blocking::Client::builder()
			
 
				-                .user_agent(APP_USER_AGENT)
			
 
				+                .user_agent(&config.user_agent)
			
 
				                 .build()?;
			
 
				 
			
 
				             println!("Verse of the day.");
			
@@ -349,33 +388,48 @@ fn main() -> Result<()> {
 
				                 println!("Verse: {}", v.verse);
			
 
				                 println!("Ref:   {}", v.reference);
			
 
				                 println!("------");
			
 
				-            };
			
 
				+            }
			
 
				         }
			
 
				         Some(Commands::Test {}) => {
			
 
				             println!("Testing...");
			
 
				+            let client = reqwest::blocking::Client::builder()
			
 
				+                .user_agent(&config.user_agent)
			
 
				+                .build()?;
			
 
				+
			
 
				+            // They are using react.  There's a token request, which allows them to fetch the daily reading...
			
 
				+            let odb = fetch::fetch(&client, "https://www.odbm.org/");
			
 
				+            // See the .har file for more details.
			
 
				+
			
 
				+            if let Ok(html) = odb {
			
 
				+                println!("{}", html);
			
 
				+            } else {
			
 
				+                println!("Fetch error: {:?}", odb.unwrap_err());
			
 
				+            }
			
 
				+
			
 
				+            if false {
			
 
				+                let path = Path::new(&cli.work).join("GEN.1.NIV");
			
 
				+                let buffer = std::fs::read_to_string(path).unwrap();
			
 
				+                let document = scraper::Html::parse_document(&buffer);
			
 
				+
			
 
				+                let span_data_usfm = scraper::Selector::parse("span[data-usfm]").unwrap();
			
 
				+                let _span_class = scraper::Selector::parse("span[class]").unwrap();
			
 
				+                let span_selector = scraper::Selector::parse("span").unwrap();
			
 
				 
			
 
				-            let path = Path::new(&cli.work).join("GEN.1.NIV");
			
 
				-            let buffer = std::fs::read_to_string(path).unwrap();
			
 
				-            let document = scraper::Html::parse_document(&buffer);
			
 
				-
			
 
				-            let span_data_usfm = scraper::Selector::parse("span[data-usfm]").unwrap();
			
 
				-            let _span_class = scraper::Selector::parse("span[class]").unwrap();
			
 
				-            let span_selector = scraper::Selector::parse("span").unwrap();
			
 
				-
			
 
				-            for span in document.select(&span_data_usfm) {
			
 
				-                if let Some(data) = span.attr("data-usfm") {
			
 
				-                    println!("data-usfm {}:", data);
			
 
				-                    let mut lines = Vec::<&str>::new();
			
 
				-                    for data_span in span.select(&span_selector) {
			
 
				-                        if let Some(data_class) = data_span.attr("class") {
			
 
				-                            if data_class.contains("content") {
			
 
				-                                let mut text = data_span.text().collect::<Vec<_>>();
			
 
				-                                println!("{} {:?}", data, text);
			
 
				-                                lines.append(&mut text);
			
 
				+                for span in document.select(&span_data_usfm) {
			
 
				+                    if let Some(data) = span.attr("data-usfm") {
			
 
				+                        println!("data-usfm {}:", data);
			
 
				+                        let mut lines = Vec::<&str>::new();
			
 
				+                        for data_span in span.select(&span_selector) {
			
 
				+                            if let Some(data_class) = data_span.attr("class") {
			
 
				+                                if data_class.contains("content") {
			
 
				+                                    let mut text = data_span.text().collect::<Vec<_>>();
			
 
				+                                    println!("{} {:?}", data, text);
			
 
				+                                    lines.append(&mut text);
			
 
				+                                }
			
 
				                             }
			
 
				                         }
			
 
				+                        println!("data {} lines {:?}", data, lines);
			
 
				                     }
			
 
				-                    println!("data {} lines {:?}", data, lines);
			
 
				                 }
			
 
				             }
			
 
				             /*
			
--- a/src/parse.rs
+++ b/src/parse.rs
@@ -1,13 +1,20 @@
 
				 use anyhow::{bail, Result};
			
 
				 use scraper;
			
 
				 use scraper::Element;
			
 
				-use url::Url;
			
 
				+use regex::Regex;
			
 
				 
			
 
				-/// Convert relate to absolute
			
 
				-fn relative_to_absolute(url: &str, href: &str) -> Result<String> {
			
 
				-    let base_url = Url::parse(url)?;
			
 
				-    let new_url = base_url.join(href)?;
			
 
				-    Ok(new_url.to_string())
			
 
				+pub fn find_versions(html: &String) -> Result<String> {
			
 
				+    let document = scraper::Html::parse_document(&html);
			
 
				+    let select_a = scraper::Selector::parse("a").unwrap();
			
 
				+    let version_match = Regex::new(r#"^[0-9.]+$"#).unwrap();
			
 
				+    for a in document.select(&select_a) {
			
 
				+        let text = element_text(a);
			
 
				+
			
 
				+        if version_match.is_match(&text) {
			
 
				+            return Ok(format!("Mozilla/5.0 (X11; Linux x86_64; rv:{}) Gecko/20100101 Firefox/{}", text, text));
			
 
				+        }
			
 
				+    }
			
 
				+    bail!("Could not locate a version string.");
			
 
				 }
			
 
				 
			
 
				 #[allow(dead_code)]
			
@@ -42,6 +49,15 @@ pub fn find_vod(html: &String) -> Result<Vec<VerseOfDay>> {
 
				     let mut result: Vec<VerseOfDay> = Vec::new();
			
 
				 
			
 
				     // How about this?
			
 
				+    /*
			
 
				+    This was build by looking at the structure of the HTML.
			
 
				+    What I looked for, was, something that would contain all of the items I was
			
 
				+    interested in.  Select it in the Web Developer tool. When everything you
			
 
				+    want is highlighted in the browser page, that's the tag you want.
			
 
				+    In this case, it was main div div div div. Tag p contained the date.
			
 
				+    Tags a in a div[class="mbs-2"] had verse and reference.
			
 
				+     */
			
 
				+
			
 
				     {
			
 
				         // Locate the Verse of the Day div tag.
			
 
				         let vod_div_select = scraper::Selector::parse("main>div>div>div>div").unwrap();
			
@@ -147,7 +163,7 @@ pub fn find_vod(html: &String) -> Result<Vec<VerseOfDay>> {
 
				     // bail!("More dERP!");
			
 
				 }
			
 
				 
			
 
				-pub fn find_next_chapter(html: &String, url: &str) -> Result<String> {
			
 
				+pub fn find_next_chapter(html: &String) -> Result<String> {
			
 
				     let document = scraper::Html::parse_document(html);
			
 
				     // let a_selector = scraper::Selector::parse("div>a").unwrap();
			
 
				     // This one works:  (starts with "/bible/").
			
@@ -167,7 +183,7 @@ pub fn find_next_chapter(html: &String, url: &str) -> Result<String> {
 
				 
			
 
				         if let Some(href) = a.attr("href") {
			
 
				             //    if href.contains("/bible/") {
			
 
				-            let href_absolute = relative_to_absolute(url, href)?;
			
 
				+            // let href_absolute = relative_to_absolute(url, href)?;
			
 
				 
			
 
				             let text = a
			
 
				                 .text()
			
@@ -188,7 +204,7 @@ pub fn find_next_chapter(html: &String, url: &str) -> Result<String> {
 
				                 // println!("Found: [{:?}]", text[0]);
			
 
				                 continue;
			
 
				             }
			
 
				-            return Ok(href_absolute);
			
 
				+            return Ok(href.to_string());
			
 
				 
			
 
				             //    } else {
			
 
				             //        println!("href contains: [{}]", href);
			
@@ -208,13 +224,13 @@ mod tests {
 
				         let html = String::from(
			
 
				             r#"<div class="[pointer-events:all]"><a href="/bible/59/GEN.2.ESV"><div class="flex items-center justify-center bg-white z-[5] h-[50px] w-[50px] rounded-full border-gray-15 border-small border-solid shadow-light-1 active:shadow-dark-1 active:bg-gray-5 active:border-b-gray-5"><svg width="25" height="25" viewBox="0 0 24 24" fill="none" xmlns="http://www.w3.org/2000/svg" aria-labelledby="Next Chapter" class="text-gray-25"><title id="Next Chapter">Next Chapter</title><path fill-rule="evenodd" clip-rule="evenodd" d="M8.293 18.707a1 1 0 0 1 0-1.414l4.94-4.94a.5.5 0 0 0 0-.707l-4.94-4.939a1 1 0 0 1 1.414-1.414l5.647 5.646a1.5 1.5 0 0 1 0 2.122l-5.647 5.646a1 1 0 0 1-1.414 0Z" fill="currentColor"></path></svg></div></a></div>"#,
			
 
				         );
			
 
				-        let r = find_next_chapter(&html, "https://bible.com/bible/link1");
			
 
				+        let r = find_next_chapter(&html);
			
 
				         if !r.is_ok() {
			
 
				             println!("DEBUG result = {:?}", r);
			
 
				         }
			
 
				         assert!(r.is_ok());
			
 
				         let link = r.unwrap();
			
 
				-        assert_eq!(link, "https://bible.com/bible/59/GEN.2.ESV");
			
 
				+        assert_eq!(link, "/bible/59/GEN.2.ESV");
			
 
				     }
			
 
				 
			
 
				     /// This tests when the HTML has been tidied.
			
@@ -235,12 +251,12 @@ mod tests {
 
				         </a>
			
 
				         </div>"#,
			
 
				         );
			
 
				-        let r = find_next_chapter(&html, "https://bible.com/bible/link1");
			
 
				+        let r = find_next_chapter(&html);
			
 
				         if !r.is_ok() {
			
 
				             println!("DEBUG result = {:?}", r);
			
 
				         }
			
 
				         assert!(r.is_ok());
			
 
				         let link = r.unwrap();
			
 
				-        assert_eq!(link, "https://bible.com/bible/59/GEN.2.ESV");
			
 
				+        assert_eq!(link, "/bible/59/GEN.2.ESV");
			
 
				     }
			
 
				 }