|
@@ -1,5 +1,6 @@
|
|
|
use anyhow::Result; // , Context};
|
|
|
use clap::{Parser, Subcommand};
|
|
|
+use config::save_basic_json;
|
|
|
use reqwest;
|
|
|
use scraper;
|
|
|
use std::{
|
|
@@ -46,12 +47,12 @@ enum Commands {
|
|
|
/// Extract information from cached files
|
|
|
Extract {
|
|
|
/// Count
|
|
|
- #[arg(short, long, default_value = "5")]
|
|
|
- count: u32,
|
|
|
+ #[arg(short, long)] // , default_value = "5")]
|
|
|
+ count: Option<u32>,
|
|
|
|
|
|
- /// All
|
|
|
- #[arg(short, long, action=clap::ArgAction::SetTrue)]
|
|
|
- all: bool,
|
|
|
+ /// Output file
|
|
|
+ #[arg(short, long)]
|
|
|
+ output: Option<PathBuf>,
|
|
|
},
|
|
|
/// Verse of the day
|
|
|
Verse {
|
|
@@ -64,7 +65,7 @@ enum Commands {
|
|
|
}
|
|
|
|
|
|
/// Configuration filename
|
|
|
-const CONFIG_FILE : &str = "app.config";
|
|
|
+const CONFIG_FILE: &str = "app.config";
|
|
|
|
|
|
/// Verse of the Day URL
|
|
|
static VOD_URL: &str = "https://www.bible.com/verse-of-the-day";
|
|
@@ -80,6 +81,77 @@ static BOOKS: LazyLock<Vec<&str>> = LazyLock::new(|| {
|
|
|
])
|
|
|
});
|
|
|
|
|
|
+static BOOK_NAMES: LazyLock<Vec<&str>> = LazyLock::new(|| {
|
|
|
+ Vec::from([
|
|
|
+ "Genesis",
|
|
|
+ "Exodus",
|
|
|
+ "Leviticus",
|
|
|
+ "Numbers",
|
|
|
+ "Deuteronomy",
|
|
|
+ "Joshua",
|
|
|
+ "Judges",
|
|
|
+ "Ruth",
|
|
|
+ "1 Samuel",
|
|
|
+ "2 Samuel",
|
|
|
+ "1 Kings",
|
|
|
+ "2 Kings",
|
|
|
+ "1 Chronicles",
|
|
|
+ "2 Chronicles",
|
|
|
+ "Ezra",
|
|
|
+ "Nehemiah",
|
|
|
+ "Esther",
|
|
|
+ "Job",
|
|
|
+ "Psalm",
|
|
|
+ "Proverbs",
|
|
|
+ "Ecclesiastes",
|
|
|
+ "Song of Solomon",
|
|
|
+ "Isaiah",
|
|
|
+ "Jeremiah",
|
|
|
+ "Lamentations",
|
|
|
+ "Ezekiel",
|
|
|
+ "Daniel",
|
|
|
+ "Hosea",
|
|
|
+ "Joel",
|
|
|
+ "Amos",
|
|
|
+ "Obadiah",
|
|
|
+ "Jonah",
|
|
|
+ "Micah",
|
|
|
+ "Nahum",
|
|
|
+ "Habakkuk",
|
|
|
+ "Zephaniah",
|
|
|
+ "Haggai",
|
|
|
+ "Zechariah",
|
|
|
+ "Malachi",
|
|
|
+ "Matthew",
|
|
|
+ "Mark",
|
|
|
+ "Luke",
|
|
|
+ "John",
|
|
|
+ "Acts",
|
|
|
+ "Romans",
|
|
|
+ "1 Corinthians",
|
|
|
+ "2 Corinthians",
|
|
|
+ "Galatians",
|
|
|
+ "Ephesians",
|
|
|
+ "Philippians",
|
|
|
+ "Colossians",
|
|
|
+ "1 Thessalonians",
|
|
|
+ "2 Thessalonians",
|
|
|
+ "1 Timothy",
|
|
|
+ "2 Timothy",
|
|
|
+ "Titus",
|
|
|
+ "Philemon",
|
|
|
+ "Hebrews",
|
|
|
+ "James",
|
|
|
+ "1 Peter",
|
|
|
+ "2 Peter",
|
|
|
+ "1 John",
|
|
|
+ "2 John",
|
|
|
+ "3 John",
|
|
|
+ "Jude",
|
|
|
+ "Revelation",
|
|
|
+ ])
|
|
|
+});
|
|
|
+
|
|
|
static BOOK_MAP: LazyLock<HashMap<&str, usize>> =
|
|
|
LazyLock::new(|| HashMap::from_iter(BOOKS.iter().enumerate().map(|x| (*x.1, x.0 + 1))));
|
|
|
|
|
@@ -214,102 +286,82 @@ fn main() -> Result<()> {
|
|
|
println!("I'm finished fetching!");
|
|
|
}
|
|
|
|
|
|
- Some(Commands::Extract { count, all }) => {
|
|
|
+ Some(Commands::Extract { count, output }) => {
|
|
|
println!("Extract...");
|
|
|
let files = find_files(cli.work.to_str().unwrap(), cli.version.as_str());
|
|
|
let filepath = Path::new(&cli.work);
|
|
|
|
|
|
- let mut chapters: HashMap<String, String> = HashMap::<String, String>::new();
|
|
|
+ // let mut chapters: HashMap<String, String> = HashMap::<String, String>::new();
|
|
|
+ let mut json_output = config::BasicJSON {
|
|
|
+ books: Vec::new(),
|
|
|
+ book: HashMap::new(),
|
|
|
+ };
|
|
|
|
|
|
let mut extractor = |file| {
|
|
|
println!("File: {}", file);
|
|
|
- println!("BV: {:?}", parse::extract_basic_verses(filepath.join(file).to_str().unwrap()));
|
|
|
- println!("----->");
|
|
|
- /*
|
|
|
- let mut filepath = cli.work.clone();
|
|
|
- filepath = filepath.join(file);
|
|
|
- */
|
|
|
- let buffer = std::fs::read_to_string(filepath.join(file)).unwrap();
|
|
|
- let document = scraper::Html::parse_document(&buffer);
|
|
|
-
|
|
|
- let h1_selector = scraper::Selector::parse("h1").unwrap();
|
|
|
- let h1 = document.select(&h1_selector).next().unwrap();
|
|
|
- println!("h1 = {:?}", h1.text().collect::<Vec<_>>());
|
|
|
-
|
|
|
- // https://programmersportal.com/the-complete-css-selectors-cheat-sheet-with-examples-and-pdf/
|
|
|
-
|
|
|
- // let span_selector = scraper::Selector::parse("span").unwrap();
|
|
|
- let span_data_usfm = scraper::Selector::parse("span[data-usfm]").unwrap();
|
|
|
- // parse r#"div>a[href ^="/bible/"]"#
|
|
|
- let span_class = scraper::Selector::parse("span[class]").unwrap();
|
|
|
- // span[class="ChapterContent_content__RrUqA"]
|
|
|
- // let span_class_content = scraper::Selector::parse(r#"span[class~="content"]"#).unwrap();
|
|
|
- // OK! ~= probably locates a matching attr line <span class="this that content"> but does not
|
|
|
- // match <span class="contains_content">!
|
|
|
+ let bv =
|
|
|
+ parse::extract_basic_verses(filepath.join(file).to_str().unwrap()).unwrap();
|
|
|
+ // println!("BV: {:?}", bv);
|
|
|
+ for bv_item in bv {
|
|
|
+ if !json_output.book.contains_key(&bv_item.book) {
|
|
|
+ // book missing
|
|
|
+ json_output.book.insert(
|
|
|
+ bv_item.book.clone(),
|
|
|
+ config::BasicChaptersJSON {
|
|
|
+ chapters: Vec::new(),
|
|
|
+ },
|
|
|
+ );
|
|
|
+ json_output.books.push(bv_item.book.clone());
|
|
|
+ }
|
|
|
+ // Book exists now.
|
|
|
|
|
|
- let _span_class_content =
|
|
|
- scraper::Selector::parse(r#"span[class="ChapterContent_content__RrUqA"]"#)
|
|
|
- .unwrap();
|
|
|
+ let chapter = bv_item.chapter;
|
|
|
+ let verse = bv_item.verse;
|
|
|
|
|
|
- for span in document.select(&span_data_usfm) {
|
|
|
- // This will always be successful.
|
|
|
- if let Some(data) = span.attr("data-usfm") {
|
|
|
- // There can be multiples of these with matching values.
|
|
|
- println!("data-usfm {}:", data);
|
|
|
+ if let Some(mbook) = json_output.book.get_mut(&bv_item.book) {
|
|
|
+ while chapter as usize > mbook.chapters.len() {
|
|
|
+ // Chapter is missing.
|
|
|
+ mbook
|
|
|
+ .chapters
|
|
|
+ .push(config::BasicVersesJSON { verses: Vec::new() });
|
|
|
+ }
|
|
|
|
|
|
- let lines: String = span
|
|
|
- .select(&span_class)
|
|
|
- // Only allow elements with attr class that containts "content"
|
|
|
- .filter(|x| {
|
|
|
- if let Some(c) = x.attr("class") {
|
|
|
- if c.contains("content") {
|
|
|
- return true;
|
|
|
- }
|
|
|
- }
|
|
|
- false
|
|
|
- })
|
|
|
- .map(|x| {
|
|
|
- // Convert element's text() iterator into a string.
|
|
|
- let init = String::new();
|
|
|
- let j = x.text().fold(init, |acc, x| {
|
|
|
- // print!( ">> {}<< ", x);
|
|
|
- let mut s = acc;
|
|
|
- if x == " " {
|
|
|
- // This would be a break/newline.
|
|
|
- s.push_str("\n");
|
|
|
- } else {
|
|
|
- s.push_str(x);
|
|
|
- }
|
|
|
- s
|
|
|
- });
|
|
|
- // println!("j = {}", j);
|
|
|
- j
|
|
|
- })
|
|
|
- .collect();
|
|
|
-
|
|
|
- println!("data {} lines {}", data, lines);
|
|
|
- if chapters.contains_key(data) {
|
|
|
- chapters.get_mut(data).unwrap().push_str(&lines);
|
|
|
- } else {
|
|
|
- chapters.insert(data.to_string(), lines);
|
|
|
+ let mverses = &mut mbook.chapters[chapter as usize - 1].verses;
|
|
|
+ while verse as usize > mverses.len() {
|
|
|
+ mverses.push(String::new());
|
|
|
}
|
|
|
+ mverses[verse as usize - 1] = bv_item.text;
|
|
|
}
|
|
|
}
|
|
|
};
|
|
|
|
|
|
- if *all {
|
|
|
- println!("Extract All:");
|
|
|
- for file in files.iter() {
|
|
|
+ if let Some(count) = *count {
|
|
|
+ // Ok, they gave us a value. Use it.
|
|
|
+ println!("Extract {}:", count);
|
|
|
+ for file in files.iter().take(count as usize) {
|
|
|
extractor(file);
|
|
|
}
|
|
|
} else {
|
|
|
- println!("Extract {}:", *count);
|
|
|
- for file in files.iter().take(*count as usize) {
|
|
|
+ println!("Extract All:");
|
|
|
+ for file in files.iter() {
|
|
|
extractor(file);
|
|
|
}
|
|
|
}
|
|
|
|
|
|
- println!("Chapters: {:?}", chapters);
|
|
|
+ if let Some(output) = output {
|
|
|
+ // Ok, they gave us a file to use.
|
|
|
+ println!("Saving output: {}", output.to_str().unwrap());
|
|
|
+ save_basic_json(output.to_str().unwrap(), &json_output)?;
|
|
|
+ }
|
|
|
+
|
|
|
+ // println!("Chapters: {:?}", chapters);
|
|
|
+
|
|
|
+ /*
|
|
|
+ // What happened here?
|
|
|
+
|
|
|
+ jq .book.John.chapters[2].verses[15] < test.json
|
|
|
+ "\n “For \n \n God so loved \n \n the world, \n \n \n that he gave his only Son, that whoever believes in him should not \n \n perish but have eternal life. \n"
|
|
|
+ */
|
|
|
|
|
|
/*
|
|
|
"AMO.8.9": "“And on that day,” declares the Lord God,\n“I will make the sun go down at noonand darken the earth in broad daylight.\n"}
|