Browse Source

Trying new strategy for parsing.

I am missing the _p__, _q1__, and _q2__ processing sections.
Steve Thielemann 1 month ago
parent
commit
9fbbf47380
4 changed files with 619 additions and 35 deletions
  1. 71 3
      src/config.rs
  2. 10 14
      src/fetch.rs
  3. 66 16
      src/main.rs
  4. 472 2
      src/parse.rs

+ 71 - 3
src/config.rs

@@ -31,17 +31,85 @@ pub struct BasicJSON {
     pub book: HashMap<String, BasicChaptersJSON>,
 }
 
+impl BasicJSON {
+    pub fn new() -> Self {
+        Self {
+            books: Vec::new(),
+            book: HashMap::new(),
+        }
+    }
+
+    /// Add new book, return mutable instance of it.
+    pub fn new_book(&mut self, name: &String) -> &mut BasicChaptersJSON {
+        if ! self.book.contains_key(name) {
+            self.book.insert(name.clone(), BasicChaptersJSON::new());
+        }
+        self.book.get_mut(name).unwrap()
+    }
+
+    pub fn add_to_books(&mut self, name: &str) {
+        self.books.push(name.to_string());
+    }
+}
+
 #[derive(Serialize, Deserialize)]
 pub struct BasicChaptersJSON {
     pub chapters: Vec<BasicVersesJSON>,
 }
 
-#[derive(Serialize, Deserialize)]
+impl BasicChaptersJSON {
+    pub fn new() -> Self {
+        Self {
+            chapters: Vec::new()
+        }
+    }
+
+    pub fn new_chapter(&mut self, index: usize) -> &mut BasicVersesJSON {
+        while self.chapters.len() < index {
+            self.chapters.push(BasicVersesJSON::new());
+        }
+        self.chapters.get_mut(index-1).unwrap()
+    }
+}
+
+#[derive(Serialize, Deserialize, Debug)]
 pub struct BasicVersesJSON {
-    pub verses: Vec<String>,
+    pub verses: Vec<BasicVerseJSON>,
+}
+
+impl BasicVersesJSON {
+    pub fn new() -> Self {
+        Self {
+            verses: Vec::new()
+        }
+    }
+
+    pub fn new_verse(&mut self, index: usize) -> &mut BasicVerseJSON {
+        while self.verses.len() < index {
+            self.verses.push(BasicVerseJSON::new());
+        }
+        self.verses.get_mut(index-1).unwrap()
+    }
 }
 
-/* 
+#[derive(Serialize, Deserialize, Debug)]
+pub struct BasicVerseJSON {
+    pub start_paragraph: bool,
+    pub heading: Option<String>,
+    pub verse: String,
+}
+
+impl BasicVerseJSON {
+    pub fn new() -> Self {
+        Self {
+            start_paragraph: false,
+            heading: None,
+            verse: String::new(),
+        }
+    }
+}
+
+/*
 impl BasicJSON {
     pub fn new() -> Self {
         Self { book: HashMap::new()}

+ 10 - 14
src/fetch.rs

@@ -1,12 +1,14 @@
-use anyhow::{Context, Result, bail};
+use super::parse;
+use anyhow::{bail, Context, Result};
 use std::{fs::File, io::Write, path::Path};
 use url::Url;
-use super::parse;
 
 /// Convert relate to absolute
 pub fn relative_to_absolute(base_url: &str, relative_href: &str) -> Result<String> {
     let base_url = Url::parse(base_url).context(format!("Url::parse({})", base_url))?;
-    let new_url = base_url.join(relative_href).context(format!("join({})", relative_href))?;
+    let new_url = base_url
+        .join(relative_href)
+        .context(format!("join({})", relative_href))?;
     Ok(new_url.to_string())
 }
 
@@ -17,8 +19,8 @@ pub struct FetchResult {
 
 pub fn agent_update(user_agent: &str) -> Result<String> {
     let client = reqwest::blocking::Client::builder()
-    .user_agent(user_agent)
-    .build()?;
+        .user_agent(user_agent)
+        .build()?;
 
     let result = fetch(&client, "https://www.mozilla.org/en-US/firefox/releases/")?;
     if let Ok(v) = parse::find_versions(&result) {
@@ -55,8 +57,7 @@ pub fn fetch_cache(
 
     if path.exists() {
         // File already exists -- use cached version.
-        let buffer = std::fs::read_to_string(&path)
-        .with_context(|| {
+        let buffer = std::fs::read_to_string(&path).with_context(|| {
             format!("Failed to read file [{:?}].", &path)
             // path.to_string_lossy().to_string());
         })?;
@@ -72,15 +73,10 @@ pub fn fetch_cache(
     let buffer = res.text()?;
 
     let mut file =
-        File::create(&path)
-        .with_context(|| { 
-            format!("Failed to write file [{:?}].", &path) 
-        })?;
+        File::create(&path).with_context(|| format!("Failed to write file [{:?}].", &path))?;
 
     file.write_all(buffer.as_bytes())
-        .with_context(|| {
-            format!("Failed to write all to [{:?}].", &path)
-        })?;
+        .with_context(|| format!("Failed to write all to [{:?}].", &path))?;
 
     Ok(FetchResult {
         cached: false,

+ 66 - 16
src/main.rs

@@ -1,4 +1,4 @@
-use anyhow::{Result, Context};
+use anyhow::{Context, Result};
 use clap::{Parser, Subcommand};
 use config::save_basic_json;
 use reqwest;
@@ -81,6 +81,7 @@ static BOOKS: LazyLock<Vec<&str>> = LazyLock::new(|| {
     ])
 });
 
+#[allow(unused)]
 static BOOK_NAMES: LazyLock<Vec<&str>> = LazyLock::new(|| {
     Vec::from([
         "Genesis",
@@ -263,11 +264,14 @@ fn main() -> Result<()> {
                 if let Ok(next_url) = next_chapter {
                     // println!("next_url: {}", next_url);
                     let abs_url = fetch::relative_to_absolute(&url, &next_url)?;
-                        // println!("now: {}", abs_url);
-                        url = abs_url;
+                    // println!("now: {}", abs_url);
+                    url = abs_url;
                 } else {
                     // We didn't find the Next Chapter link, so stop.
-                    println!("Did not find Next Chapter link. {}", next_chapter.unwrap_err());
+                    println!(
+                        "Did not find Next Chapter link. {}",
+                        next_chapter.unwrap_err()
+                    );
                     more = false;
                 }
 
@@ -298,11 +302,30 @@ fn main() -> Result<()> {
                 book: HashMap::new(),
             };
 
+            let mut last_book = String::new();
+            
             let mut extractor = |file| {
                 println!("File: {}", file);
                 let bv =
-                    parse::extract_basic_verses(filepath.join(file).to_str().unwrap()).unwrap();
-                // println!("BV: {:?}", bv);
+                    parse::extract_verses(filepath.join(file).to_str().unwrap()).unwrap();
+
+                println!("Book {} Chapter {} BV: {:?}", bv.0, bv.1, bv.2);
+                if bv.0 != last_book {
+                    last_book = bv.0.clone();
+                    json_output.add_to_books(&bv.0);
+                }
+
+                let mut json_book = json_output.new_book(&bv.0);
+                let mut chapter = json_book.new_chapter(bv.1 as usize);
+
+                for (idx, bv_item) in bv.2.verses.into_iter().enumerate() {
+                    let mut verse = chapter.new_verse(idx+1);
+                    verse.heading = bv_item.heading;
+                    verse.start_paragraph = bv_item.start_paragraph;
+                    verse.verse = bv_item.verse;
+                }
+
+                /* 
                 for bv_item in bv {
                     if !json_output.book.contains_key(&bv_item.book) {
                         // book missing
@@ -328,12 +351,15 @@ fn main() -> Result<()> {
                         }
 
                         let mverses = &mut mbook.chapters[chapter as usize - 1].verses;
+                        /* 
                         while verse as usize > mverses.len() {
                             mverses.push(String::new());
                         }
                         mverses[verse as usize - 1] = bv_item.text;
+                        */
                     }
                 }
+                */
             };
 
             if let Some(count) = *count {
@@ -357,11 +383,26 @@ fn main() -> Result<()> {
 
             // println!("Chapters: {:?}", chapters);
 
+            /*
+            <span class="heading"></span>
+            <span class="verse">
+                <span class="label">1</span>
+                <span class="content">text</span>
+                <span class="note">
+                    <span class="label">#</span>
+                    <span class="body">note text</span>
+                </span>
+                <span class="content">text continues</span>
+            </span>
+             */
+
             /*
             // What happened here?
 
             jq .book.John.chapters[2].verses[15] < test.json
             "\n “For \n \n God so loved \n \n the world, \n \n \n that he gave his only Son, that whoever believes in him should not \n \n perish but have eternal life. \n"
+
+            There are multiple notes present in this verse, which I think confuses my simple verse parser code.  This is also red-letter text.
              */
 
             /*
@@ -404,18 +445,27 @@ fn main() -> Result<()> {
 
         Some(Commands::Test {}) => {
             println!("Testing...");
-            let client = reqwest::blocking::Client::builder()
-                .user_agent(&config.user_agent)
-                .build()?;
 
-            // They are using react.  There's a token request, which allows them to fetch the daily reading...
-            let odb = fetch::fetch(&client, "https://www.odbm.org/");
-            // See the .har file for more details.
+            if true {
+                let path = Path::new(&cli.work).join("JHN.3.ESV");
+                println!("Extract Verses: {:?}", path);
+                parse::extract_verses(path.to_str().unwrap())?;
+            }
 
-            if let Ok(html) = odb {
-                println!("{}", html);
-            } else {
-                println!("Fetch error: {:?}", odb.unwrap_err());
+            if false {
+                let client = reqwest::blocking::Client::builder()
+                    .user_agent(&config.user_agent)
+                    .build()?;
+
+                // They are using react.  There's a token request, which allows them to fetch the daily reading...
+                let odb = fetch::fetch(&client, "https://www.odbm.org/");
+                // See the .har file for more details.
+
+                if let Ok(html) = odb {
+                    println!("{}", html);
+                } else {
+                    println!("Fetch error: {:?}", odb.unwrap_err());
+                }
             }
 
             if false {

+ 472 - 2
src/parse.rs

@@ -1,7 +1,9 @@
-use anyhow::{bail, Result};
+use super::config;
+use anyhow::{bail, Context, Result};
 use regex::Regex;
 use scraper;
 use scraper::Element;
+use std::collections::HashSet;
 use std::string::String;
 
 pub fn find_versions(html: &String) -> Result<String> {
@@ -165,8 +167,473 @@ pub struct BasicVerse {
     pub text: String,
 }
 
+fn parse_html_file(filename: &str) -> Result<scraper::Html> {
+    let buffer =
+        std::fs::read_to_string(filename).context(format!("Failed to read: {}", filename))?;
+    Ok(scraper::Html::parse_document(&buffer))
+}
+
+// This shows child elements correctly.
+// If I could build a structure of the chapter, maybe I could parse it?
+// I would at least know what to look for...
+
+fn show_structure(element: scraper::element_ref::ElementRef<'_>, depth: u32) {
+    // For output formatting.
+    let spacer = " ".repeat(depth as usize * 4);
+
+    // This can be multiple classes, so watch out here.
+    let cls = element.attr("class").unwrap();
+    println!(
+        "{} {} E {} {} {:?}",
+        depth,
+        spacer,
+        element.value().name(),
+        cls,
+        element.value()
+    );
+
+    if element.has_children() {
+        // This always seem to think there's children elements. ?!?
+        // println!(" >>");
+        for child in element.child_elements() {
+            show_structure(child, depth + 1);
+        }
+    }
+}
+
+#[derive(Debug)]
+pub enum VerseInformation {
+    ChapterVerse(String),
+    Content(String),
+    Note(String),
+}
+
+/// Clean element class, and return in a set.
+///
+/// Classes that have __ in them are returned without the __ and ...
+fn clean_class(element: scraper::element_ref::ElementRef<'_>) -> HashSet<String> {
+    let mut result = HashSet::<String>::new();
+    if let Some(e_class) = element.attr("class") {
+        for c in e_class.split(" ") {
+            if let Some(chapter) = c.split_once("__") {
+                result.insert(chapter.0.to_string());
+            } else {
+                result.insert(c.to_string());
+            }
+        }
+    }
+    result
+}
+
+// This doesn't work because ft is a child of body.
+fn walk_note(element: scraper::element_ref::ElementRef<'_>, results: &mut Vec<VerseInformation>) {
+    let body_selector = scraper::Selector::parse(r#"span[class*="_body__"], span[class="ft"]"#).unwrap();
+    let mut text = String::new();
+
+    if let Some(body) = element.select(&body_selector).next() {
+    // for body in element.select(&body_selector).next() {
+        if !text.is_empty() {
+            text.push_str(" ");
+        }
+        text.push_str(element_text(body).as_str());
+    }
+    if !text.is_empty() {
+        results.push(VerseInformation::Note(text));
+    }
+}
+
+fn add_append_note(results: &mut Vec<VerseInformation>, note:&str) {
+    if let Some(last) = results.last_mut() {
+        if let VerseInformation::Note(n) = last {
+            // Ok, the last thing is a "ChapterVerse".
+            n.push_str(" ");
+            n.push_str(note);
+        } else {
+            results.push(VerseInformation::Note(note.to_string()));
+        }
+    }
+} 
+
+fn walker(
+    element: scraper::element_ref::ElementRef<'_>,
+    results: &mut Vec<VerseInformation>,
+    classes: &mut HashSet<String>,
+    depth: u32,
+) {
+    // For output formatting.
+    // let spacer = " ".repeat(depth as usize * 4);
+    let class_hash = clean_class(element);
+
+    if classes.contains("ChapterContent_note") {
+        // println!("note: {}", element.html());
+
+        // Ok, we're in the "note"
+        // Look for body or ft
+        if class_hash.contains("ChapterContent_body") {
+            // This the the body
+            let mut has_children = false;
+            for child in element.child_elements() {
+                has_children = true;
+                if let Some(cl) = child.attr("class") {
+                    // label = "#"
+                    // _fr__ = chapter_verse
+                    if cl.contains("_label__") || cl.contains("_fr__") {
+                        continue;
+                    }
+                    add_append_note(results, &element_text(child));
+                    // results.push(VerseInformation::Note(element_text(child)));
+                }
+            }
+
+            if !has_children {
+                let text = element_text(element);
+                if !text.is_empty() {
+                    // Check previous results?
+                    add_append_note(results, &text);
+                    // results.push(VerseInformation::Note(text));
+                }
+                // No children, we can return.
+                
+            }
+            return;
+        }
+    }
+
+    // let mut ch_verse = String::new();
+    if class_hash.contains("ChapterContent_verse") {
+        if let Some(ch_v) = element.attr("data-usfm") {
+            // Check the last item.
+            if let Some(last) = results.last() {
+                if let VerseInformation::ChapterVerse(_) = last {
+                    // Ok, the last thing is a "ChapterVerse".  Remove it.
+                    results.pop();
+                }
+            }
+            results.push(VerseInformation::ChapterVerse(ch_v.to_string()));
+        }
+    }
+
+    // We'll have to clean out the results, sometimes we get chapter and verse
+    // information (with a blank content).
+
+    if class_hash.contains("ChapterContent_content") {
+        // Ok!  We have content.
+        // Should I check the classes here for:
+        // _p__ Paragraph?
+        // _q1__, _q2__ Quote?
+
+        let text = element_text(element);
+        if !text.is_empty() {
+            results.push(VerseInformation::Content(text));
+        }
+    }
+
+    if element.has_children() {
+        // Add the classes to our class tracker.
+        for ch in class_hash.iter() {
+            classes.insert(ch.clone());
+        }
+
+        for child in element.child_elements() {
+            walker(child, results, classes, depth + 1);
+        }
+
+        // Remove the classes from the class tracker.
+        for ch in class_hash {
+            classes.remove(&ch);
+        }
+    }
+}
+
+fn element_walker(element: scraper::element_ref::ElementRef<'_>) -> Vec<VerseInformation> {
+    let mut result = Vec::<VerseInformation>::new();
+    let mut classes = HashSet::<String>::new();
+    walker(element, &mut result, &mut classes, 0);
+    result
+}
+
+/// Extract just the Chapter's verses.
+///
+/// Returns Book, Chapter, and Verses
+pub fn extract_verses(filename: &str) -> Result<(String, u8, config::BasicVersesJSON)> {
+    let mut result = config::BasicVersesJSON::new();
+    let document = parse_html_file(filename)?;
+    let h1_selector = scraper::Selector::parse("h1").unwrap();
+    let h1 = document.select(&h1_selector).next().unwrap();
+    let mut book = element_text(h1);
+
+    // Remove chapter number from book.
+    while book.pop() != Some(' ') {
+        // Check for a problem.
+        if book.is_empty() {
+            bail!(format!(
+                "Failed to trim the chapter from [{}].",
+                element_text(h1)
+            ));
+        }
+    }
+
+    let mut chapter_number: u8 = 0;
+
+    // Locate the div that contains all of the chapter verses
+    let chapter_selector = scraper::Selector::parse(r#"div[class*="_chapter__"]"#).unwrap();
+    if let Some(chapter) = document.select(&chapter_selector).next() {
+        // Ok, this is the chapter section.
+
+        // This works amazingly well for showing how the html is structured.
+        // show_structure(chapter, 0);
+
+        println!("Elements: {:?}", element_walker(chapter));
+
+        println!("Chapter: {}", chapter.html());
+
+        // Look for _s1__ and _p__
+        // Missing _q1__ and _q2__ (quoted text?)  It is left-indented.
+
+        let section_or_p_selector = scraper::Selector::parse(
+            r#"div[class*="_p__"], div[class*="_s1__"], div[class*="_q1__"], div[class*="_q2__"]"#,
+        )
+        .unwrap();
+        let mut section_header = String::new();
+        let mut start_paragraph = false;
+        let section_heading_selector =
+            scraper::Selector::parse(r#"span[class*="_heading__"]"#).unwrap();
+        let verse_selector = scraper::Selector::parse(r#"span[class*="_verse__"]"#).unwrap();
+
+        for section in chapter.select(&section_or_p_selector) {
+            if let Some(cls) = section.attr("class") {
+                if cls.contains("_s1__") {
+                    // Get section header
+                    section_header.clear();
+                    for sh in section.select(&section_heading_selector) {
+                        if !section_header.is_empty() {
+                            section_header.push_str(" ");
+                        }
+                        section_header.push_str(&element_text(sh));
+                    }
+                    println!("Heading: {}", section_header);
+                    /*
+                    if let Some(section_heading) = section.select(&section_heading_selector).next()
+                    {
+                        section_header = element_text(section_heading);
+                        println!("Heading: {}", section_header);
+                    }
+                    */
+                } else if cls.contains("_p__") {
+                    start_paragraph = true;
+                    println!("¶ paragraph");
+
+                    // Process verses here...
+
+                    // We do get verses with blank content (from the previous paragraph).
+                    // We need to handle that.
+
+                    for verse in section.select(&verse_selector) {
+                        if let Some(ch_ver) = verse.attr("data-usfm") {
+                            println!(">> {}", ch_ver);
+
+                            let mut output_chapter = false;
+
+                            let parts = ch_ver.split(".").skip(1).collect::<Vec<_>>();
+                            chapter_number = 0;
+                            if parts.len() == 2 {
+                                chapter_number = parts[0].parse()?;
+                            }
+                            let verse_number: u8 = parts.last().unwrap_or(&"0").parse()?;
+
+                            // println!("Book {} : {}", book, ch_ver);
+
+                            // Find verses content
+                            let verse_content_selector =
+                                scraper::Selector::parse(r#"span[class*="Content_content__"]"#)
+                                    .unwrap();
+                            for content in verse.select(&verse_content_selector) {
+                                // Check for empty content -- and skip it.
+                                let content_text = element_text(content);
+                                if content_text.is_empty() {
+                                    continue;
+                                }
+
+                                let verse = result.new_verse(verse_number as usize);
+
+                                if start_paragraph {
+                                    verse.start_paragraph = true;
+                                    start_paragraph = false;
+                                }
+                                if !section_header.is_empty() {
+                                    verse.heading = Some(section_header);
+                                    section_header = String::new();
+                                }
+
+                                if !verse.verse.is_empty() {
+                                    verse.verse.push_str(" ");
+                                }
+                                verse.verse.push_str(&content_text);
+
+                                if !output_chapter {
+                                    output_chapter = true;
+                                    println!("Book {} : {}", book, ch_ver);
+                                }
+
+                                println!(">> {}", content.html());
+                                println!("  [{}]", content_text);
+                            }
+                            println!("-- next verse --");
+                        }
+                    }
+
+                    println!("end ¶ paragraph");
+                } else {
+                    // _q1__, _q2__
+                    // Each part of the verse is wrapped in the _q1__ or _q2__'s.
+
+                    /*
+                    BUG:
+                    Galacians 2.13-14.
+                    {
+                    "start_paragraph": false,
+                    "heading": null,
+                    "verse": ""
+                    },
+                    {
+                    "start_paragraph": false,
+                    "heading": null,
+                    "verse": ""
+                    },
+                    {
+                    "start_paragraph": true,
+                    "heading": "Justified by Faith",
+                    "verse": "But if, in our endeavor to be justified in Christ, we too were found to be sinners, is Christ then a servant of sin? Certainly not!"
+                    },
+
+                    2.13 and 2.14 aren't wrapped in anything. (no _p__, _q1__, or _q2__)
+
+                    Amos 5.1
+                    Hear this word that I take up over you in lamentation, O house of Israel:
+                    {
+                    "start_paragraph": true,
+                    "heading": "Seek the",
+                    "verse": "Hear this word that I take up over you in lamentation, O house of Israel:"
+                    },
+
+                    // Heading in multiple spans.
+                    <div data-usfm="AMO.5" class="ChapterContent_chapter__uvbXo">
+                        <div class="ChapterContent_label__R2PLt">5</div>
+                        <div class="ChapterContent_s1__bNNaW">
+                            <span class="ChapterContent_heading__xBDcs">Seek the </span>
+                            <span class="ChapterContent_nd__ECPAf">
+                                <span class="ChapterContent_heading__xBDcs">Lord</span>
+                            </span>
+                            <span class="ChapterContent_heading__xBDcs"> and Live</span>
+                        </div>
+                        <div class="ChapterContent_p__dVKHb">
+                            <span class="ChapterContent_content__RrUqA">  </span>
+
+                    Amos 5.4
+                    {
+                    "start_paragraph": false,
+                    "heading": null,
+                    "verse": "“Seek me and live;"
+                    },
+
+                    Missing non-quoted part.
+
+                     */
+
+                    // So, sections of a verse could be "Quoted"...
+                    // Also, sections could be marked red letters.
+
+                    // Pulling a verse:
+                    // jq .book.Galatians.chapters[c].verses[v].verse
+
+                    // I get duplicate outputs of Book and next verse for the same verse.
+                    /*
+                    >> GEN.1.27
+                    Book Genesis : GEN.1.27
+                    >> <span class="ChapterContent_content__RrUqA">So God created man in his own image,</span>
+                      [So God created man in his own image,]
+                    -- next verse --
+                    >> GEN.1.27
+                    Book Genesis : GEN.1.27
+                    >> <span class="ChapterContent_content__RrUqA">in the image of God he created him;</span>
+                      [in the image of God he created him;]
+                    -- next verse --
+                    >> GEN.1.27
+                    Book Genesis : GEN.1.27
+                    >> <span class="ChapterContent_content__RrUqA">male and female he created them.</span>
+                      [male and female he created them.]
+                    -- next verse --
+                    ¶ paragraph
+                    >> GEN.1.27
+                    -- next verse --
+                                         */
+                    for verse in section.select(&verse_selector) {
+                        if let Some(ch_ver) = verse.attr("data-usfm") {
+                            println!(">> {}", ch_ver);
+
+                            let mut output_chapter = false;
+
+                            let parts = ch_ver.split(".").skip(1).collect::<Vec<_>>();
+                            chapter_number = 0;
+                            if parts.len() == 2 {
+                                chapter_number = parts[0].parse()?;
+                            }
+                            let verse_number: u8 = parts.last().unwrap_or(&"0").parse()?;
+
+                            // println!("Book {} : {}", book, ch_ver);
+
+                            // Find verses content
+                            let verse_content_selector =
+                                scraper::Selector::parse(r#"span[class*="Content_content__"]"#)
+                                    .unwrap();
+                            for content in verse.select(&verse_content_selector) {
+                                // Check for empty content -- and skip it.
+                                let content_text = element_text(content);
+                                if content_text.is_empty() {
+                                    continue;
+                                }
+
+                                let verse = result.new_verse(verse_number as usize);
+
+                                if start_paragraph {
+                                    verse.start_paragraph = true;
+                                    start_paragraph = false;
+                                }
+                                if !section_header.is_empty() {
+                                    verse.heading = Some(section_header);
+                                    section_header = String::new();
+                                }
+
+                                if !verse.verse.is_empty() {
+                                    verse.verse.push_str(" ");
+                                }
+                                verse.verse.push_str(&content_text);
+
+                                if !output_chapter {
+                                    output_chapter = true;
+                                    println!("Book {} : {}", book, ch_ver);
+                                }
+
+                                println!(">> {}", content.html());
+                                println!("  [{}]", content_text);
+                            }
+                            println!("-- next verse --");
+                        }
+                    }
+                }
+            }
+        }
+    } else {
+        bail!("Unable to locate the div tag with _chapter__.");
+    }
+
+    Ok((book, chapter_number, result))
+}
+
+#[deprecated(note = "This is the old version, use extract_verses")]
 pub fn extract_basic_verses(filename: &str) -> Result<Vec<BasicVerse>> {
     let mut result = Vec::<BasicVerse>::new();
+
     let buffer = std::fs::read_to_string(filename)?;
     let document = scraper::Html::parse_document(&buffer);
     let h1_selector = scraper::Selector::parse("h1").unwrap();
@@ -200,7 +667,10 @@ pub fn extract_basic_verses(filename: &str) -> Result<Vec<BasicVerse>> {
                     while book.pop() != Some(' ') {
                         // Check for a problem.
                         if book.is_empty() {
-                            bail!(format!("Failed to trim the chapter from [{}].", element_text(h1)));
+                            bail!(format!(
+                                "Failed to trim the chapter from [{}].",
+                                element_text(h1)
+                            ));
                         }
                     }
                 }