|
@@ -40,7 +40,11 @@ enum Commands {
|
|
|
delay: u32,
|
|
|
},
|
|
|
/// Extract information from cached files
|
|
|
- Extract {},
|
|
|
+ Extract {
|
|
|
+ /// Count
|
|
|
+ #[arg(short, long, default_value = "5")]
|
|
|
+ count: u32,
|
|
|
+ },
|
|
|
/// Test something out
|
|
|
Test {},
|
|
|
}
|
|
@@ -213,10 +217,12 @@ fn main() {
|
|
|
}
|
|
|
println!("I'm finished fetching!");
|
|
|
}
|
|
|
- Some(Commands::Extract {}) => {
|
|
|
+ Some(Commands::Extract { count}) => {
|
|
|
println!("Extract...");
|
|
|
- let files = find_files(cli.work.to_str().unwrap(), cli.bible.as_str());
|
|
|
- for file in files.iter().take(5) {
|
|
|
+ let mut files = find_files(cli.work.to_str().unwrap(), cli.bible.as_str());
|
|
|
+ files.insert(0, String::from("bible/GEN.1.NIV"));
|
|
|
+
|
|
|
+ for file in files.iter().take(*count as usize) {
|
|
|
println!("File: {}", file);
|
|
|
let buffer = std::fs::read_to_string(Path::new(file)).unwrap();
|
|
|
let document = scraper::Html::parse_document(&buffer);
|
|
@@ -225,11 +231,74 @@ fn main() {
|
|
|
let h1 = document.select(&h1_selector).next().unwrap();
|
|
|
println!("h1 = {:?}", h1.text().collect::<Vec<_>>());
|
|
|
|
|
|
- let span_selector = scraper::Selector::parse("span").unwrap();
|
|
|
- for span in document.select(&span_selector) {
|
|
|
+ // https://programmersportal.com/the-complete-css-selectors-cheat-sheet-with-examples-and-pdf/
|
|
|
+
|
|
|
+ // let span_selector = scraper::Selector::parse("span").unwrap();
|
|
|
+ let span_data_usfm = scraper::Selector::parse("span[data-usfm]").unwrap();
|
|
|
+ // parse r#"div>a[href ^="/bible/"]"#
|
|
|
+ let span_class = scraper::Selector::parse("span[class]").unwrap();
|
|
|
+ // span[class="ChapterContent_content__RrUqA"]
|
|
|
+ // let span_class_content = scraper::Selector::parse(r#"span[class~="content"]"#).unwrap();
|
|
|
+ // OK! ~= probably locates a matching attr line <span class="this that content"> but does not
|
|
|
+ // match <span class="contains_content">!
|
|
|
+
|
|
|
+ let span_class_content =
|
|
|
+ scraper::Selector::parse(r#"span[class="ChapterContent_content__RrUqA"]"#)
|
|
|
+ .unwrap();
|
|
|
+
|
|
|
+ for span in document.select(&span_data_usfm) {
|
|
|
+ // This will always be successful.
|
|
|
if let Some(data) = span.attr("data-usfm") {
|
|
|
- println!("{}:", data);
|
|
|
- let mut lines = Vec::<&str>::new();
|
|
|
+ println!("data-usfm {}:", data);
|
|
|
+ // let mut lines = String::new();
|
|
|
+
|
|
|
+ // let mut lines = Vec::<&str>::new();
|
|
|
+ /*
|
|
|
+ for ds in span.select(&span_class) {
|
|
|
+ let c = ds.attr("class").unwrap();
|
|
|
+ let text = ds.text().collect::<Vec<_>>();
|
|
|
+ println!("ds class {} text: {:?}", c, text);
|
|
|
+ }
|
|
|
+ */
|
|
|
+
|
|
|
+ // let lines : String = span.select(&span_class_content)
|
|
|
+ let lines: String = span
|
|
|
+ .select(&span_class)
|
|
|
+ // Only allow elements with attr class that containts "content"
|
|
|
+ .filter(|x| {
|
|
|
+ if let Some(c) = x.attr("class") {
|
|
|
+ if c.contains("content") {
|
|
|
+ return true;
|
|
|
+ }
|
|
|
+ }
|
|
|
+ false
|
|
|
+ })
|
|
|
+ .map(|x| {
|
|
|
+ // Convert element's text() iterator into a string.
|
|
|
+ let init = String::new();
|
|
|
+ let j = x.text().fold(init, |acc, x| {
|
|
|
+ // print!( ">> {}<< ", x);
|
|
|
+ let mut s = acc;
|
|
|
+ if x == " " {
|
|
|
+ // This would be a break/newline.
|
|
|
+ s.push_str("\n");
|
|
|
+ } else {
|
|
|
+ s.push_str(x);
|
|
|
+ }
|
|
|
+ s
|
|
|
+ });
|
|
|
+ // println!("j = {}", j);
|
|
|
+ j
|
|
|
+ })
|
|
|
+ .collect();
|
|
|
+
|
|
|
+ /*
|
|
|
+ .fold(String::new(), |acc, x| {
|
|
|
+ format!("{:?}", x)
|
|
|
+ });
|
|
|
+ */
|
|
|
+
|
|
|
+ /*
|
|
|
for data_span in span.select(&span_selector) {
|
|
|
if let Some(data_class) = data_span.attr("class") {
|
|
|
if data_class.contains("content") {
|
|
@@ -239,11 +308,12 @@ fn main() {
|
|
|
}
|
|
|
}
|
|
|
}
|
|
|
- println!("{} {:?}", data, lines);
|
|
|
+ */
|
|
|
+ println!("data {} lines {}", data, lines);
|
|
|
}
|
|
|
}
|
|
|
|
|
|
- /* ESV
|
|
|
+ /* ESV
|
|
|
JHN.8.11 ["She said, “No one, Lord.” And Jesus said, "]
|
|
|
JHN.8.11 ["“Neither do I condemn you; go, and from now on "]
|
|
|
JHN.8.11 ["sin no more.”"]
|
|
@@ -254,11 +324,33 @@ fn main() {
|
|
|
}
|
|
|
Some(Commands::Test {}) => {
|
|
|
println!("Testing...");
|
|
|
- // Test finding div>a[href^="/bible/"]
|
|
|
+
|
|
|
let path = Path::new("bible").join("GEN.1.NIV");
|
|
|
let buffer = std::fs::read_to_string(path).unwrap();
|
|
|
let document = scraper::Html::parse_document(&buffer);
|
|
|
|
|
|
+ let span_data_usfm = scraper::Selector::parse("span[data-usfm]").unwrap();
|
|
|
+ let _span_class = scraper::Selector::parse("span[class]").unwrap();
|
|
|
+ let span_selector = scraper::Selector::parse("span").unwrap();
|
|
|
+
|
|
|
+ for span in document.select(&span_data_usfm) {
|
|
|
+ if let Some(data) = span.attr("data-usfm") {
|
|
|
+ println!("data-usfm {}:", data);
|
|
|
+ let mut lines = Vec::<&str>::new();
|
|
|
+ for data_span in span.select(&span_selector) {
|
|
|
+ if let Some(data_class) = data_span.attr("class") {
|
|
|
+ if data_class.contains("content") {
|
|
|
+ let mut text = data_span.text().collect::<Vec<_>>();
|
|
|
+ println!("{} {:?}", data, text);
|
|
|
+ lines.append(&mut text);
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ println!("data {} lines {:?}", data, lines);
|
|
|
+ }
|
|
|
+ }
|
|
|
+ /*
|
|
|
+ // Test finding div>a[href^="/bible/"]
|
|
|
// let a_selector = scraper::Selector::parse("div>a").unwrap();
|
|
|
let a_selector = scraper::Selector::parse(r#"div>a[href ^="/bible/"]"#).unwrap();
|
|
|
|
|
@@ -273,6 +365,7 @@ fn main() {
|
|
|
}
|
|
|
println!("=====");
|
|
|
}
|
|
|
+ */
|
|
|
}
|
|
|
None => {
|
|
|
println!("Looking for FETCH or EXTRACT");
|