123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597 |
- use anyhow::Result;
- use clap::{Parser, Subcommand};
- use reqwest;
- use scraper;
- use std::{
- collections::HashMap,
- path::{Path, PathBuf},
- string::String,
- sync::LazyLock,
- };
- use std::{thread, time::Duration};
- mod fetch;
- mod parse;
- #[derive(Parser)]
- #[command(about = "Downloads and parses Bible verses from https://www.bible.com", long_about=None, arg_required_else_help = true, after_help = "This is very specific to the website's HTML.\nIf it changes, this program might no longer work.")]
- struct Cli {
-
- #[arg(short, long, default_value = "bible")]
- work: PathBuf,
-
- #[arg(short, long, default_value = "ESV")]
- version: String,
- #[command(subcommand)]
- command: Option<Commands>,
- }
- #[derive(Subcommand)]
- enum Commands {
-
- Fetch {
-
- #[arg(short, long, default_value = "10")]
- delay: u32,
- },
-
- Extract {
-
- #[arg(short, long, default_value = "5")]
- count: u32,
-
- #[arg(short, long, action=clap::ArgAction::SetTrue)]
- all: bool,
- },
-
- Verse {
-
- #[arg(short, long, action=clap::ArgAction::SetTrue)]
- fetch: bool,
- },
-
- Test {},
- }
- static APP_USER_AGENT: &str =
- "Mozilla/5.0 (X11; Linux x86_64; rv:134.0) Gecko/20100101 Firefox/134.0";
- static VOD_URL: &str = "https://www.bible.com/verse-of-the-day";
- static VERSION_URLS: LazyLock<HashMap<&str, &str>> = LazyLock::new(|| {
- HashMap::from([
- ("ESV", "https://www.bible.com/bible/59/GEN.1.ESV"),
- ("KJV", "https://www.bible.com/bible/1/GEN.1.KJV"),
- ("NIV", "https://www.bible.com/bible/111/GEN.INTRO1.NIV"),
- ("NKJV", "https://www.bible.com/bible/114/GEN.1.NKJV"),
-
- ("YLT98", "https://www.bible.com/bible/821/GEN.1.YLT98"),
- ])
- });
- static BOOKS: LazyLock<Vec<&str>> = LazyLock::new(|| {
- Vec::from([
- "GEN", "EXO", "LEV", "NUM", "DEU", "JOS", "JDG", "RUT", "1SA", "2SA", "1KI", "2KI", "1CH",
- "2CH", "EZR", "NEH", "EST", "JOB", "PSA", "PRO", "ECC", "SNG", "ISA", "JER", "LAM", "EZK",
- "DAN", "HOS", "JOL", "AMO", "OBA", "JON", "MIC", "NAM", "HAB", "ZEP", "HAG", "ZEC", "MAL",
- "MAT", "MRK", "LUK", "JHN", "ACT", "ROM", "1CO", "2CO", "GAL", "EPH", "PHP", "COL", "1TH",
- "2TH", "1TI", "2TI", "TIT", "PHM", "HEB", "JAS", "1PE", "2PE", "1JN", "2JN", "3JN", "JUD",
- "REV",
- ])
- });
- static BOOK_MAP: LazyLock<HashMap<&str, usize>> =
- LazyLock::new(|| HashMap::from_iter(BOOKS.iter().enumerate().map(|x| (*x.1, x.0 + 1))));
- fn find_files(base_dir: &str, version: &str) -> Vec<String> {
- let paths = std::fs::read_dir(base_dir).unwrap();
- let mut result = Vec::<String>::new();
- for path in paths {
- if let Ok(dir) = path {
- let filename = dir.file_name().to_string_lossy().to_string();
- if filename.ends_with(version) {
- result.push(filename);
-
- }
- }
- }
- let sorter_helper = |x: &String| -> (usize, i32) {
- let v: Vec<&str> = x.split(".").collect();
- let mut b: usize = 0;
- if BOOK_MAP.contains_key(v[0]) {
- b = BOOK_MAP[v[0]];
- }
- let c: i32 = v[1].parse().unwrap_or(0);
- (b, c)
- };
-
-
-
- result.sort_by(|a, b| {
- let a_v = sorter_helper(a);
- let b_v = sorter_helper(b);
- a_v.cmp(&b_v)
- });
- result
- }
- fn main() -> Result<()> {
- let cli = Cli::parse();
-
-
- if !VERSION_URLS.contains_key(cli.version.as_str()) {
- println!("Sorry, I don't know about Bible Version [{}].", cli.version);
- println!("I do know about the following:");
-
- for (name, _) in VERSION_URLS.iter() {
- println!(" {}", name);
- }
- return Ok(());
- }
- match &cli.command {
- Some(Commands::Fetch { delay }) => {
- let client = reqwest::blocking::Client::builder()
- .user_agent(APP_USER_AGENT)
- .build()?;
-
- let mut url = VERSION_URLS[cli.version.as_str()].to_string();
- println!("Fetch! [{}] with delay {} secs.", cli.version, delay);
- let mut more = true;
- let mut cache_hit_once = true;
- while more {
- let result = fetch::fetch_cache(
- cli.work
- .as_os_str()
- .to_str()
- .expect("Work should be valid."),
- &client,
- url.as_str(),
- )?;
- let next_chapter = parse::find_next_chapter(&result.html, &url);
- if let Ok(next_url) = next_chapter {
-
-
-
- url = next_url;
- } else {
-
- more = false;
- }
-
- if more {
- if !result.cached {
- thread::sleep(Duration::from_secs(*delay as u64));
- } else {
- if cache_hit_once {
-
- println!("Using CACHE.");
- cache_hit_once = false;
- }
- }
- }
- }
- println!("I'm finished fetching!");
- }
- Some(Commands::Extract { count, all }) => {
- println!("Extract...");
- let files = find_files(cli.work.to_str().unwrap(), cli.version.as_str());
- let filepath = Path::new(&cli.work);
- let mut chapters: HashMap<String, String> = HashMap::<String, String>::new();
- let mut extractor = |file| {
- println!("File: {}", file);
-
- let buffer = std::fs::read_to_string(filepath.join(file)).unwrap();
- let document = scraper::Html::parse_document(&buffer);
- let h1_selector = scraper::Selector::parse("h1").unwrap();
- let h1 = document.select(&h1_selector).next().unwrap();
- println!("h1 = {:?}", h1.text().collect::<Vec<_>>());
-
-
- let span_data_usfm = scraper::Selector::parse("span[data-usfm]").unwrap();
-
- let span_class = scraper::Selector::parse("span[class]").unwrap();
-
-
-
-
- let _span_class_content =
- scraper::Selector::parse(r#"span[class="ChapterContent_content__RrUqA"]"#)
- .unwrap();
- for span in document.select(&span_data_usfm) {
-
- if let Some(data) = span.attr("data-usfm") {
-
- println!("data-usfm {}:", data);
- let lines: String = span
- .select(&span_class)
-
- .filter(|x| {
- if let Some(c) = x.attr("class") {
- if c.contains("content") {
- return true;
- }
- }
- false
- })
- .map(|x| {
-
- let init = String::new();
- let j = x.text().fold(init, |acc, x| {
-
- let mut s = acc;
- if x == " " {
-
- s.push_str("\n");
- } else {
- s.push_str(x);
- }
- s
- });
-
- j
- })
- .collect();
- println!("data {} lines {}", data, lines);
- if chapters.contains_key(data) {
- chapters.get_mut(data).unwrap().push_str(&lines);
- } else {
- chapters.insert(data.to_string(), lines);
- }
- }
- }
- };
- if *all {
- println!("Extract All:");
- for file in files.iter() {
- extractor(file);
- }
- } else {
- println!("Extract {}:", *count);
- for file in files.iter().take(*count as usize) {
- extractor(file);
- }
- }
- println!("Chapters: {:?}", chapters);
-
-
- }
- Some(Commands::Verse { fetch: _ }) => {
- let client = reqwest::blocking::Client::builder()
- .user_agent(APP_USER_AGENT)
- .build()?;
- println!("Verse of the day.");
- let result = fetch::fetch_cache(
- cli.work
- .as_os_str()
- .to_str()
- .expect("Work should be valid."),
- &client,
- VOD_URL,
- )?;
- if result.cached {
- println!("(from cache):");
- }
- for v in parse::find_vod(&result.html)? {
- println!("Date: {}", v.date);
- println!("Verse: {}", v.verse);
- println!("Ref: {}", v.reference);
- println!("------");
- };
- }
- Some(Commands::Test {}) => {
- println!("Testing...");
- let path = Path::new(&cli.work).join("GEN.1.NIV");
- let buffer = std::fs::read_to_string(path).unwrap();
- let document = scraper::Html::parse_document(&buffer);
- let span_data_usfm = scraper::Selector::parse("span[data-usfm]").unwrap();
- let _span_class = scraper::Selector::parse("span[class]").unwrap();
- let span_selector = scraper::Selector::parse("span").unwrap();
- for span in document.select(&span_data_usfm) {
- if let Some(data) = span.attr("data-usfm") {
- println!("data-usfm {}:", data);
- let mut lines = Vec::<&str>::new();
- for data_span in span.select(&span_selector) {
- if let Some(data_class) = data_span.attr("class") {
- if data_class.contains("content") {
- let mut text = data_span.text().collect::<Vec<_>>();
- println!("{} {:?}", data, text);
- lines.append(&mut text);
- }
- }
- }
- println!("data {} lines {:?}", data, lines);
- }
- }
-
- }
- None => {
- println!("I didn't see a command. Displaying help.\n");
- let _show_help: Cli = Cli::parse_from(["--help"]);
- }
- }
- Ok(())
- }
|