|
@@ -16,7 +16,7 @@ mod parse;
|
|
|
// Setup the command line options
|
|
|
|
|
|
#[derive(Parser)]
|
|
|
-#[command(about, long_about=None)]
|
|
|
+#[command(about = "Downloads and parses Bible verses from https://www.bible.com", long_about=None, arg_required_else_help = true, after_help = "This is very specific to the website's HTML.\nIf it changes, this program might no longer work.")]
|
|
|
struct Cli {
|
|
|
/// Working directory
|
|
|
#[arg(short, long, default_value = "bible")]
|
|
@@ -48,6 +48,12 @@ enum Commands {
|
|
|
#[arg(short, long, action=clap::ArgAction::SetTrue)]
|
|
|
all: bool,
|
|
|
},
|
|
|
+ /// Verse of the day
|
|
|
+ Verse {
|
|
|
+ /// Fetch new version
|
|
|
+ #[arg(short, long, action=clap::ArgAction::SetTrue)]
|
|
|
+ fetch: bool,
|
|
|
+ },
|
|
|
/// Test something out
|
|
|
Test {},
|
|
|
}
|
|
@@ -56,6 +62,7 @@ static APP_USER_AGENT: &str =
|
|
|
"Mozilla/5.0 (X11; Linux x86_64; rv:134.0) Gecko/20100101 Firefox/134.0";
|
|
|
|
|
|
static BASE_URL: &str = "https://www.bible.com";
|
|
|
+static VOD_URL: &str = "https://www.bible.com/verse-of-the-day";
|
|
|
|
|
|
static VERSION_URLS: LazyLock<HashMap<&str, &str>> = LazyLock::new(|| {
|
|
|
HashMap::from([
|
|
@@ -163,6 +170,7 @@ fn main() -> Result<()> {
|
|
|
let mut url = VERSION_URLS[cli.version.as_str()].to_string();
|
|
|
println!("Fetch! [{}] with delay {} secs.", cli.version, delay);
|
|
|
let mut more = true;
|
|
|
+ let mut cache_hit_once = true;
|
|
|
|
|
|
while more {
|
|
|
let result = fetch::fetch_cache(
|
|
@@ -186,52 +194,20 @@ fn main() -> Result<()> {
|
|
|
url = next_url.to_string();
|
|
|
}
|
|
|
} else {
|
|
|
+ // We didn't find the Next Chapter link, so stop.
|
|
|
more = false;
|
|
|
}
|
|
|
|
|
|
- /*
|
|
|
- let document = scraper::Html::parse_document(&result.html);
|
|
|
-
|
|
|
- // TO FIX
|
|
|
- // We want to upgrade this to use CSS selectors.
|
|
|
- // For now, us the "working" code we have.
|
|
|
- let a_selector = scraper::Selector::parse("div>a").unwrap();
|
|
|
- for a in document.select(&a_selector) {
|
|
|
- // Skip elements with a class attribute
|
|
|
- if a.attr("class").is_some() {
|
|
|
- continue;
|
|
|
- }
|
|
|
-
|
|
|
- if let Some(href) = a.attr("href") {
|
|
|
- if href.contains("/bible/") {
|
|
|
- let text = a.text().collect::<Vec<_>>();
|
|
|
-
|
|
|
- if text.len() != 1 {
|
|
|
- continue;
|
|
|
- }
|
|
|
- if text[0] != "Next Chapter" {
|
|
|
- continue;
|
|
|
- }
|
|
|
-
|
|
|
- // Ok! We've found the Next Chapter a element!
|
|
|
- if href.starts_with("/") {
|
|
|
- url = String::from(BASE_URL) + href;
|
|
|
- } else {
|
|
|
- url = href.to_string();
|
|
|
- }
|
|
|
-
|
|
|
- // println!("Found HREF: {} => {}", href, url);
|
|
|
- // panic!("Squirrel alert!");
|
|
|
- more = true;
|
|
|
- break;
|
|
|
- }
|
|
|
- }
|
|
|
- }
|
|
|
- */
|
|
|
-
|
|
|
+ // If there's more to do, add a delay between requests.
|
|
|
if more {
|
|
|
if !result.cached {
|
|
|
thread::sleep(Duration::from_secs(*delay as u64));
|
|
|
+ } else {
|
|
|
+ if cache_hit_once {
|
|
|
+ // Display this message only once.
|
|
|
+ println!("Using CACHE.");
|
|
|
+ cache_hit_once = false;
|
|
|
+ }
|
|
|
}
|
|
|
}
|
|
|
}
|
|
@@ -337,19 +313,31 @@ fn main() -> Result<()> {
|
|
|
"AMO.8.9": "“And on that day,” declares the Lord God,\n“I will make the sun go down at noonand darken the earth in broad daylight.\n"}
|
|
|
^ noonand ? Shouldn't that be "noon and"? Check original. Original has a break between them. Check merge routine.
|
|
|
*/
|
|
|
- /*
|
|
|
- // for file in files.iter().take(*count as usize) {
|
|
|
- for file in files_iter {
|
|
|
- /* ESV
|
|
|
- JHN.8.11 ["She said, “No one, Lord.” And Jesus said, "]
|
|
|
- JHN.8.11 ["“Neither do I condemn you; go, and from now on "]
|
|
|
- JHN.8.11 ["sin no more.”"]
|
|
|
- JHN.8.11 ["]]"] <- What is this? It is the the original HTML.
|
|
|
- JHN.8.11 [" "]
|
|
|
- */
|
|
|
- }
|
|
|
+ /* ESV
|
|
|
+ JHN.8.11 ["She said, “No one, Lord.” And Jesus said, "]
|
|
|
+ JHN.8.11 ["“Neither do I condemn you; go, and from now on "]
|
|
|
+ JHN.8.11 ["sin no more.”"]
|
|
|
+ JHN.8.11 ["]]"] <- What is this? It is the the original HTML.
|
|
|
+ JHN.8.11 [" "]
|
|
|
*/
|
|
|
}
|
|
|
+
|
|
|
+ Some(Commands::Verse{ fetch}) => {
|
|
|
+ let client = reqwest::blocking::Client::builder()
|
|
|
+ .user_agent(APP_USER_AGENT)
|
|
|
+ .build()?;
|
|
|
+
|
|
|
+ println!("Verse of the day.");
|
|
|
+ let result = fetch::fetch_cache(cli.work
|
|
|
+ .as_os_str()
|
|
|
+ .to_str()
|
|
|
+ .expect("Work should be valid."), &client, VOD_URL)?;
|
|
|
+ if result.cached {
|
|
|
+ println!("(from cache):");
|
|
|
+ }
|
|
|
+ let _v = parse::find_vod(&result.html);
|
|
|
+
|
|
|
+ }
|
|
|
Some(Commands::Test {}) => {
|
|
|
println!("Testing...");
|
|
|
|
|
@@ -396,8 +384,8 @@ fn main() -> Result<()> {
|
|
|
*/
|
|
|
}
|
|
|
None => {
|
|
|
- println!("Looking for FETCH or EXTRACT");
|
|
|
- println!("I've got nothing to do here...");
|
|
|
+ println!("I didn't see a command. Displaying help.\n");
|
|
|
+ let _show_help : Cli = Cli::parse_from(["--help"]);
|
|
|
}
|
|
|
}
|
|
|
|