|
@@ -9,6 +9,7 @@ use std::{
|
|
|
fs::File,
|
|
|
io::Write,
|
|
|
path::{Path, PathBuf},
|
|
|
+ string::String,
|
|
|
sync::LazyLock,
|
|
|
};
|
|
|
|
|
@@ -47,15 +48,46 @@ enum Commands {
|
|
|
static APP_USER_AGENT: &str =
|
|
|
"Mozilla/5.0 (X11; Linux x86_64; rv:134.0) Gecko/20100101 Firefox/134.0";
|
|
|
|
|
|
+static BASE_URL: &str = "https://www.bible.com";
|
|
|
+
|
|
|
static VERSION_URLS: LazyLock<HashMap<&str, &str>> = LazyLock::new(|| {
|
|
|
HashMap::from([
|
|
|
("ESV", "https://www.bible.com/bible/59/GEN.1.ESV"),
|
|
|
("KJV", "https://www.bible.com/bible/1/GEN.1.KJV"),
|
|
|
- ("NIV", "https://www.bible.com/bible/111/GEN.1.NIV"),
|
|
|
+ ("NIV", "https://www.bible.com/bible/111/GEN.INTRO1.NIV"),
|
|
|
+ // https://www.bible.com/bible/111/GEN.1.NIV"),
|
|
|
("YLT98", "https://www.bible.com/bible/821/GEN.1.YLT98"),
|
|
|
])
|
|
|
});
|
|
|
|
|
|
+static BOOKS: LazyLock<Vec<&str>> = LazyLock::new(|| {
|
|
|
+ Vec::from([
|
|
|
+ "GEN", "EXO", "LEV", "NUM", "DEU", "JOS", "JDG", "RUT", "1SA", "2SA", "1KI", "2KI", "1CH",
|
|
|
+ "2CH", "EZR", "NEH", "EST", "JOB", "PSA", "PRO", "ECC", "SNG", "ISA", "JER", "LAM", "EZK",
|
|
|
+ "DAN", "HOS", "JOL", "AMO", "OBA", "JON", "MIC", "NAM", "HAB", "ZEP", "HAG", "ZEC", "MAL",
|
|
|
+ "MAT", "MRK", "LUK", "JHN", "ACT", "ROM", "1CO", "2CO", "GAL", "EPH", "PHP", "COL", "1TH",
|
|
|
+ "2TH", "1TI", "2TI", "TIT", "PHM", "HEB", "JAS", "1PE", "2PE", "1JN", "2JN", "3JN", "JUD",
|
|
|
+ "REV",
|
|
|
+ ])
|
|
|
+});
|
|
|
+
|
|
|
+// find_files in base_dir that end with extension bible.
|
|
|
+fn find_files(base_dir: &str, bible: &str) -> Vec<String> {
|
|
|
+ let paths = std::fs::read_dir(base_dir).unwrap();
|
|
|
+ let mut result = Vec::<String>::new();
|
|
|
+
|
|
|
+ for path in paths {
|
|
|
+ if let Ok(dir) = path {
|
|
|
+ let filename = dir.file_name().to_string_lossy().to_string();
|
|
|
+ if filename.ends_with(bible) {
|
|
|
+ // result.push(dir.file_name().to_string_lossy().to_string());
|
|
|
+ result.push(dir.path().to_string_lossy().to_string());
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ result
|
|
|
+}
|
|
|
+
|
|
|
// Start here
|
|
|
// static URL: &str = "https://www.bible.com/bible/59/PSA.95.ESV";
|
|
|
// "https://www.bible.com/bible/59/GEN.1.ESV";
|
|
@@ -120,13 +152,13 @@ fn main() {
|
|
|
}
|
|
|
return;
|
|
|
}
|
|
|
-
|
|
|
+
|
|
|
match &cli.command {
|
|
|
Some(Commands::Fetch { delay }) => {
|
|
|
let client = reqwest::blocking::Client::builder()
|
|
|
- .user_agent(APP_USER_AGENT)
|
|
|
- .build()
|
|
|
- .unwrap();
|
|
|
+ .user_agent(APP_USER_AGENT)
|
|
|
+ .build()
|
|
|
+ .unwrap();
|
|
|
let mut url = VERSION_URLS[cli.bible.as_str()].to_string();
|
|
|
println!("Fetch! [{}] with delay {} secs.", cli.bible, delay);
|
|
|
let mut more = true;
|
|
@@ -142,7 +174,6 @@ fn main() {
|
|
|
// For now, us the "working" code we have.
|
|
|
let a_selector = scraper::Selector::parse("div>a").unwrap();
|
|
|
for a in document.select(&a_selector) {
|
|
|
-
|
|
|
// Skip elements with a class attribute
|
|
|
if a.attr("class").is_some() {
|
|
|
continue;
|
|
@@ -161,7 +192,7 @@ fn main() {
|
|
|
|
|
|
// Ok! We've found the Next Chapter a element!
|
|
|
if href.starts_with("/") {
|
|
|
- url = String::from("https://www.bible.com") + href;
|
|
|
+ url = String::from(BASE_URL) + href;
|
|
|
} else {
|
|
|
url = href.to_string();
|
|
|
}
|
|
@@ -169,7 +200,7 @@ fn main() {
|
|
|
// println!("Found HREF: {} => {}", href, url);
|
|
|
// panic!("Squirrel alert!");
|
|
|
more = true;
|
|
|
- break
|
|
|
+ break;
|
|
|
}
|
|
|
}
|
|
|
}
|
|
@@ -184,9 +215,64 @@ fn main() {
|
|
|
}
|
|
|
Some(Commands::Extract {}) => {
|
|
|
println!("Extract...");
|
|
|
+ let files = find_files(cli.work.to_str().unwrap(), cli.bible.as_str());
|
|
|
+ for file in files.iter().take(5) {
|
|
|
+ println!("File: {}", file);
|
|
|
+ let buffer = std::fs::read_to_string(Path::new(file)).unwrap();
|
|
|
+ let document = scraper::Html::parse_document(&buffer);
|
|
|
+
|
|
|
+ let h1_selector = scraper::Selector::parse("h1").unwrap();
|
|
|
+ let h1 = document.select(&h1_selector).next().unwrap();
|
|
|
+ println!("h1 = {:?}", h1.text().collect::<Vec<_>>());
|
|
|
+
|
|
|
+ let span_selector = scraper::Selector::parse("span").unwrap();
|
|
|
+ for span in document.select(&span_selector) {
|
|
|
+ if let Some(data) = span.attr("data-usfm") {
|
|
|
+ println!("{}:", data);
|
|
|
+ let mut lines = Vec::<&str>::new();
|
|
|
+ for data_span in span.select(&span_selector) {
|
|
|
+ if let Some(data_class) = data_span.attr("class") {
|
|
|
+ if data_class.contains("content") {
|
|
|
+ let mut text = data_span.text().collect::<Vec<_>>();
|
|
|
+ println!("{} {:?}", data, text);
|
|
|
+ lines.append(&mut text);
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ println!("{} {:?}", data, lines);
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ /* ESV
|
|
|
+ JHN.8.11 ["She said, “No one, Lord.” And Jesus said, "]
|
|
|
+ JHN.8.11 ["“Neither do I condemn you; go, and from now on "]
|
|
|
+ JHN.8.11 ["sin no more.”"]
|
|
|
+ JHN.8.11 ["]]"] <- What is this?
|
|
|
+ JHN.8.11 [" "]
|
|
|
+ */
|
|
|
+ }
|
|
|
}
|
|
|
Some(Commands::Test {}) => {
|
|
|
println!("Testing...");
|
|
|
+ // Test finding div>a[href^="/bible/"]
|
|
|
+ let path = Path::new("bible").join("GEN.1.NIV");
|
|
|
+ let buffer = std::fs::read_to_string(path).unwrap();
|
|
|
+ let document = scraper::Html::parse_document(&buffer);
|
|
|
+
|
|
|
+ // let a_selector = scraper::Selector::parse("div>a").unwrap();
|
|
|
+ let a_selector = scraper::Selector::parse(r#"div>a[href ^="/bible/"]"#).unwrap();
|
|
|
+
|
|
|
+ // This reduces down the number of items to check (from ~40 to 4)!
|
|
|
+ // And invalidates the check for /bible/ !
|
|
|
+
|
|
|
+ for a in document.select(&a_selector) {
|
|
|
+ let text = a.text().collect::<Vec<_>>();
|
|
|
+ println!("text: {:?}", text);
|
|
|
+ if let Some(href) = a.attr("href") {
|
|
|
+ println!("href = {}", href);
|
|
|
+ }
|
|
|
+ println!("=====");
|
|
|
+ }
|
|
|
}
|
|
|
None => {
|
|
|
println!("Looking for FETCH or EXTRACT");
|
|
@@ -197,7 +283,7 @@ fn main() {
|
|
|
/*
|
|
|
return;
|
|
|
|
|
|
-
|
|
|
+
|
|
|
let client = reqwest::blocking::Client::builder()
|
|
|
.user_agent(APP_USER_AGENT)
|
|
|
.build()
|
|
@@ -328,7 +414,6 @@ fn main() {
|
|
|
|
|
|
// let res = client.get("https://www.bible.com/bible/59/GEN.1.ESV").send().unwrap();
|
|
|
*/
|
|
|
-
|
|
|
}
|
|
|
|
|
|
/*
|