|
@@ -1,8 +1,6 @@
|
|
|
+use clap::{Parser, Subcommand};
|
|
|
+use core::slice::Iter;
|
|
|
use reqwest;
|
|
|
-
|
|
|
-// use std::io::Write;
|
|
|
-// use std::io::Read;
|
|
|
-
|
|
|
use scraper;
|
|
|
use std::{
|
|
|
collections::HashMap,
|
|
@@ -12,10 +10,10 @@ use std::{
|
|
|
string::String,
|
|
|
sync::LazyLock,
|
|
|
};
|
|
|
-
|
|
|
-use clap::{Parser, Subcommand};
|
|
|
use std::{thread, time::Duration};
|
|
|
|
|
|
+// Setup the command line options
|
|
|
+
|
|
|
#[derive(Parser)]
|
|
|
#[command(about, long_about=None)]
|
|
|
struct Cli {
|
|
@@ -44,6 +42,10 @@ enum Commands {
|
|
|
/// Count
|
|
|
#[arg(short, long, default_value = "5")]
|
|
|
count: u32,
|
|
|
+
|
|
|
+ /// All
|
|
|
+ #[arg(short, long, action=clap::ArgAction::SetTrue)]
|
|
|
+ all: bool,
|
|
|
},
|
|
|
/// Test something out
|
|
|
Test {},
|
|
@@ -89,6 +91,9 @@ fn find_files(base_dir: &str, bible: &str) -> Vec<String> {
|
|
|
}
|
|
|
}
|
|
|
}
|
|
|
+
|
|
|
+ // It would be nice to sort these (by book and chapter), so they are in order.
|
|
|
+ // Should I just return file_names instead of path?
|
|
|
result
|
|
|
}
|
|
|
|
|
@@ -217,13 +222,17 @@ fn main() {
|
|
|
}
|
|
|
println!("I'm finished fetching!");
|
|
|
}
|
|
|
- Some(Commands::Extract { count}) => {
|
|
|
+
|
|
|
+ Some(Commands::Extract { count, all }) => {
|
|
|
println!("Extract...");
|
|
|
let mut files = find_files(cli.work.to_str().unwrap(), cli.bible.as_str());
|
|
|
files.insert(0, String::from("bible/GEN.1.NIV"));
|
|
|
|
|
|
- for file in files.iter().take(*count as usize) {
|
|
|
+ let mut chapters: HashMap<String, String> = HashMap::<String, String>::new();
|
|
|
+
|
|
|
+ let mut extractor = |file| {
|
|
|
println!("File: {}", file);
|
|
|
+
|
|
|
let buffer = std::fs::read_to_string(Path::new(file)).unwrap();
|
|
|
let document = scraper::Html::parse_document(&buffer);
|
|
|
|
|
@@ -249,19 +258,9 @@ fn main() {
|
|
|
for span in document.select(&span_data_usfm) {
|
|
|
// This will always be successful.
|
|
|
if let Some(data) = span.attr("data-usfm") {
|
|
|
+ // There can be multples of these with matching values.
|
|
|
println!("data-usfm {}:", data);
|
|
|
- // let mut lines = String::new();
|
|
|
-
|
|
|
- // let mut lines = Vec::<&str>::new();
|
|
|
- /*
|
|
|
- for ds in span.select(&span_class) {
|
|
|
- let c = ds.attr("class").unwrap();
|
|
|
- let text = ds.text().collect::<Vec<_>>();
|
|
|
- println!("ds class {} text: {:?}", c, text);
|
|
|
- }
|
|
|
- */
|
|
|
|
|
|
- // let lines : String = span.select(&span_class_content)
|
|
|
let lines: String = span
|
|
|
.select(&span_class)
|
|
|
// Only allow elements with attr class that containts "content"
|
|
@@ -292,35 +291,42 @@ fn main() {
|
|
|
})
|
|
|
.collect();
|
|
|
|
|
|
- /*
|
|
|
- .fold(String::new(), |acc, x| {
|
|
|
- format!("{:?}", x)
|
|
|
- });
|
|
|
- */
|
|
|
-
|
|
|
- /*
|
|
|
- for data_span in span.select(&span_selector) {
|
|
|
- if let Some(data_class) = data_span.attr("class") {
|
|
|
- if data_class.contains("content") {
|
|
|
- let mut text = data_span.text().collect::<Vec<_>>();
|
|
|
- println!("{} {:?}", data, text);
|
|
|
- lines.append(&mut text);
|
|
|
- }
|
|
|
- }
|
|
|
- }
|
|
|
- */
|
|
|
println!("data {} lines {}", data, lines);
|
|
|
+ if chapters.contains_key(data) {
|
|
|
+ chapters.get_mut(data).unwrap().push_str(&lines);
|
|
|
+ } else {
|
|
|
+ chapters.insert(data.to_string(), lines);
|
|
|
+ }
|
|
|
}
|
|
|
}
|
|
|
+ };
|
|
|
|
|
|
+ if *all {
|
|
|
+ println!("Extract All:");
|
|
|
+ for file in files.iter() {
|
|
|
+ extractor(file);
|
|
|
+ }
|
|
|
+ } else {
|
|
|
+ println!("Extract {}:", *count);
|
|
|
+ for file in files.iter().take(*count as usize) {
|
|
|
+ extractor(file);
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ println!("Chapters: {:?}", chapters);
|
|
|
+
|
|
|
+ /*
|
|
|
+ // for file in files.iter().take(*count as usize) {
|
|
|
+ for file in files_iter {
|
|
|
/* ESV
|
|
|
JHN.8.11 ["She said, “No one, Lord.” And Jesus said, "]
|
|
|
JHN.8.11 ["“Neither do I condemn you; go, and from now on "]
|
|
|
JHN.8.11 ["sin no more.”"]
|
|
|
- JHN.8.11 ["]]"] <- What is this?
|
|
|
+ JHN.8.11 ["]]"] <- What is this? It is the the original HTML.
|
|
|
JHN.8.11 [" "]
|
|
|
*/
|
|
|
}
|
|
|
+ */
|
|
|
}
|
|
|
Some(Commands::Test {}) => {
|
|
|
println!("Testing...");
|