2 Commits 9fbbf47380 ... 6f31a912ee

Author SHA1 Message Date
  Steve Thielemann 6f31a912ee Working WalkerParser. 10 months ago
  Steve Thielemann 9371e904d8 Working walker. Change to struct to clean it up. 10 months ago
3 changed files with 375 additions and 174 deletions
  1. 18 40
      src/config.rs
  2. 12 10
      src/main.rs
  3. 345 124
      src/parse.rs

+ 18 - 40
src/config.rs

@@ -32,6 +32,7 @@ pub struct BasicJSON {
 }
 
 impl BasicJSON {
+    #[allow(dead_code)]    
     pub fn new() -> Self {
         Self {
             books: Vec::new(),
@@ -41,7 +42,7 @@ impl BasicJSON {
 
     /// Add new book, return mutable instance of it.
     pub fn new_book(&mut self, name: &String) -> &mut BasicChaptersJSON {
-        if ! self.book.contains_key(name) {
+        if !self.book.contains_key(name) {
             self.book.insert(name.clone(), BasicChaptersJSON::new());
         }
         self.book.get_mut(name).unwrap()
@@ -60,7 +61,7 @@ pub struct BasicChaptersJSON {
 impl BasicChaptersJSON {
     pub fn new() -> Self {
         Self {
-            chapters: Vec::new()
+            chapters: Vec::new(),
         }
     }
 
@@ -68,62 +69,39 @@ impl BasicChaptersJSON {
         while self.chapters.len() < index {
             self.chapters.push(BasicVersesJSON::new());
         }
-        self.chapters.get_mut(index-1).unwrap()
+        self.chapters.get_mut(index - 1).unwrap()
     }
 }
 
 #[derive(Serialize, Deserialize, Debug)]
 pub struct BasicVersesJSON {
-    pub verses: Vec<BasicVerseJSON>,
+    pub verses: Vec<Vec<BasicVerseJSON>>,
 }
 
 impl BasicVersesJSON {
     pub fn new() -> Self {
-        Self {
-            verses: Vec::new()
-        }
+        Self { verses: Vec::new() }
     }
 
-    pub fn new_verse(&mut self, index: usize) -> &mut BasicVerseJSON {
+    pub fn verse(&mut self, index: usize) -> &mut Vec<BasicVerseJSON> {
         while self.verses.len() < index {
-            self.verses.push(BasicVerseJSON::new());
+            self.verses.push(Vec::new());
         }
-        self.verses.get_mut(index-1).unwrap()
+        self.verses.get_mut(index - 1).unwrap()
     }
 }
 
 #[derive(Serialize, Deserialize, Debug)]
-pub struct BasicVerseJSON {
-    pub start_paragraph: bool,
-    pub heading: Option<String>,
-    pub verse: String,
-}
-
-impl BasicVerseJSON {
-    pub fn new() -> Self {
-        Self {
-            start_paragraph: false,
-            heading: None,
-            verse: String::new(),
-        }
-    }
-}
-
-/*
-impl BasicJSON {
-    pub fn new() -> Self {
-        Self { book: HashMap::new()}
-    }
-}
-*/
-
-/*
-#[derive(Serialize, Deserialize)]
-pub struct BasicVerseJSON {
-    pub verse: u8,
-    pub text: String,
+pub enum BasicVerseJSON {
+    Heading(String),
+    Note(String),
+    Verse {
+        text: String,
+        paragraph: bool,
+        quote: bool,
+        red: bool,
+    },
 }
-*/
 
 pub fn save_basic_json(filename: &str, json: &BasicJSON) -> Result<()> {
     let data = serde_json::to_string_pretty(json)?;

+ 12 - 10
src/main.rs

@@ -304,10 +304,10 @@ fn main() -> Result<()> {
 
             let mut last_book = String::new();
             
-            let mut extractor = |file| {
+            let mut extractor = |file| -> Result<()> {
                 println!("File: {}", file);
                 let bv =
-                    parse::extract_verses(filepath.join(file).to_str().unwrap()).unwrap();
+                    parse::extract_verses(filepath.join(file).to_str().unwrap())?;
 
                 println!("Book {} Chapter {} BV: {:?}", bv.0, bv.1, bv.2);
                 if bv.0 != last_book {
@@ -315,14 +315,15 @@ fn main() -> Result<()> {
                     json_output.add_to_books(&bv.0);
                 }
 
-                let mut json_book = json_output.new_book(&bv.0);
-                let mut chapter = json_book.new_chapter(bv.1 as usize);
+                let json_book = json_output.new_book(&bv.0);
+                let chapter = json_book.new_chapter(bv.1 as usize);
 
                 for (idx, bv_item) in bv.2.verses.into_iter().enumerate() {
-                    let mut verse = chapter.new_verse(idx+1);
-                    verse.heading = bv_item.heading;
-                    verse.start_paragraph = bv_item.start_paragraph;
-                    verse.verse = bv_item.verse;
+                    let verse = chapter.verse(idx+1);
+                    for bvi in bv_item {
+                        verse.push(bvi);
+                    }
+                    // verse = bv_item.clone();
                 }
 
                 /* 
@@ -360,18 +361,19 @@ fn main() -> Result<()> {
                     }
                 }
                 */
+                Ok(())
             };
 
             if let Some(count) = *count {
                 // Ok, they gave us a value.  Use it.
                 println!("Extract {}:", count);
                 for file in files.iter().take(count as usize) {
-                    extractor(file);
+                    extractor(file)?;
                 }
             } else {
                 println!("Extract All:");
                 for file in files.iter() {
-                    extractor(file);
+                    extractor(file)?;
                 }
             }
 

+ 345 - 124
src/parse.rs

@@ -50,10 +50,12 @@ fn element_text(element: scraper::ElementRef<'_>) -> String {
     text
 }
 
+
 /// Extract element verse text
 ///
 /// This trims the elements, (translating "  " to "\n").
 /// Joins with a single space.
+#[allow(dead_code)]
 fn verse_element_text(element: scraper::ElementRef<'_>) -> String {
     let span_class = scraper::Selector::parse("span[class]").unwrap();
     let text: String = element
@@ -158,6 +160,7 @@ pub fn find_vod(html: &String) -> Result<Vec<VerseOfDay>> {
     Ok(result)
 }
 
+/* 
 #[derive(Debug)]
 pub struct BasicVerse {
     pub book: String,
@@ -166,6 +169,7 @@ pub struct BasicVerse {
     // pub chapter_verse: String,
     pub text: String,
 }
+*/
 
 fn parse_html_file(filename: &str) -> Result<scraper::Html> {
     let buffer =
@@ -177,18 +181,22 @@ fn parse_html_file(filename: &str) -> Result<scraper::Html> {
 // If I could build a structure of the chapter, maybe I could parse it?
 // I would at least know what to look for...
 
-fn show_structure(element: scraper::element_ref::ElementRef<'_>, depth: u32) {
+/// Display the structure of the HTML
+///
+/// This shows a properly indented layout of the HTML tags.
+/// It shows what is nested in what, and what attributes the element
+/// has.  (And it doesn't delete empty tags like html tidy does.)
+pub fn show_structure(element: scraper::element_ref::ElementRef<'_>, depth: u32) {
     // For output formatting.
     let spacer = " ".repeat(depth as usize * 4);
 
     // This can be multiple classes, so watch out here.
-    let cls = element.attr("class").unwrap();
+    // let cls = element.attr("class").unwrap();
     println!(
-        "{} {} E {} {} {:?}",
+        "{} {} E {} {:?}",
         depth,
         spacer,
         element.value().name(),
-        cls,
         element.value()
     );
 
@@ -201,13 +209,23 @@ fn show_structure(element: scraper::element_ref::ElementRef<'_>, depth: u32) {
     }
 }
 
+/// Verse information
 #[derive(Debug)]
 pub enum VerseInformation {
+    Heading(String),
+    /// Chapter and Verse "3.16"
     ChapterVerse(String),
-    Content(String),
+    Content {
+        text: String,
+        quoted: bool,
+        paragraph: bool,
+        red: bool,
+    },
+    /// Verse note
     Note(String),
 }
 
+/*
 /// Clean element class, and return in a set.
 ///
 /// Classes that have __ in them are returned without the __ and ...
@@ -227,11 +245,12 @@ fn clean_class(element: scraper::element_ref::ElementRef<'_>) -> HashSet<String>
 
 // This doesn't work because ft is a child of body.
 fn walk_note(element: scraper::element_ref::ElementRef<'_>, results: &mut Vec<VerseInformation>) {
-    let body_selector = scraper::Selector::parse(r#"span[class*="_body__"], span[class="ft"]"#).unwrap();
+    let body_selector =
+        scraper::Selector::parse(r#"span[class*="_body__"], span[class="ft"]"#).unwrap();
     let mut text = String::new();
 
     if let Some(body) = element.select(&body_selector).next() {
-    // for body in element.select(&body_selector).next() {
+        // for body in element.select(&body_selector).next() {
         if !text.is_empty() {
             text.push_str(" ");
         }
@@ -242,7 +261,7 @@ fn walk_note(element: scraper::element_ref::ElementRef<'_>, results: &mut Vec<Ve
     }
 }
 
-fn add_append_note(results: &mut Vec<VerseInformation>, note:&str) {
+fn add_append_note(results: &mut Vec<VerseInformation>, note: &str) {
     if let Some(last) = results.last_mut() {
         if let VerseInformation::Note(n) = last {
             // Ok, the last thing is a "ChapterVerse".
@@ -252,12 +271,13 @@ fn add_append_note(results: &mut Vec<VerseInformation>, note:&str) {
             results.push(VerseInformation::Note(note.to_string()));
         }
     }
-} 
+}
 
 fn walker(
     element: scraper::element_ref::ElementRef<'_>,
     results: &mut Vec<VerseInformation>,
     classes: &mut HashSet<String>,
+    track: &mut HashMap<String, String>,
     depth: u32,
 ) {
     // For output formatting.
@@ -293,7 +313,6 @@ fn walker(
                     // results.push(VerseInformation::Note(text));
                 }
                 // No children, we can return.
-                
             }
             return;
         }
@@ -302,14 +321,32 @@ fn walker(
     // let mut ch_verse = String::new();
     if class_hash.contains("ChapterContent_verse") {
         if let Some(ch_v) = element.attr("data-usfm") {
+            // I'm getting duplicate ChapterVerse items in the results now.
             // Check the last item.
+            let mut new_chv = false;
+
+            if track.contains_key("ch_v") {
+                if let Some(tchv) = track.get("ch_v") {
+                    if tchv != ch_v {
+                        new_chv = true;
+                        track.insert("ch_v".to_string(), ch_v.to_string());
+                    }
+                }
+            } else {
+                new_chv = true;
+                track.insert("ch_v".to_string(), ch_v.to_string());
+            }
+
             if let Some(last) = results.last() {
                 if let VerseInformation::ChapterVerse(_) = last {
                     // Ok, the last thing is a "ChapterVerse".  Remove it.
                     results.pop();
                 }
             }
-            results.push(VerseInformation::ChapterVerse(ch_v.to_string()));
+
+            if new_chv {
+                results.push(VerseInformation::ChapterVerse(ch_v.to_string()));
+            }
         }
     }
 
@@ -321,13 +358,41 @@ fn walker(
         // Should I check the classes here for:
         // _p__ Paragraph?
         // _q1__, _q2__ Quote?
+        let quoted = classes.contains("ChapterContent_q1") || classes.contains("ChapterContent_q2");
+        let red = classes.contains("ChapterContent_wj")
+            || classes.contains("ChapterContent_wordsofchrist");
+        let text = element_text(element);
+        if !text.is_empty() {
+
+            // We have something to save.  Is this start of paragraph?
+            let p = track.contains_key("p");
+            if p {
+                // Ok, we're storing it.  Reset the paragraph flag.
+                track.remove("p");
+            }
+
+            results.push(VerseInformation::Content {
+                text,
+                quoted,
+                paragraph: p,
+                red,
+            });
+        }
+    }
 
+    if class_hash.contains("ChapterContent_heading") {
         let text = element_text(element);
         if !text.is_empty() {
-            results.push(VerseInformation::Content(text));
+            results.push(VerseInformation::Heading(text));
         }
     }
 
+    if class_hash.contains("ChapterContent_p") {
+        track.insert("p".to_string(), "".to_string());
+    }
+
+    // Unfortunately, has_children always returns true...
+
     if element.has_children() {
         // Add the classes to our class tracker.
         for ch in class_hash.iter() {
@@ -335,7 +400,7 @@ fn walker(
         }
 
         for child in element.child_elements() {
-            walker(child, results, classes, depth + 1);
+            walker(child, results, classes, track, depth + 1);
         }
 
         // Remove the classes from the class tracker.
@@ -345,12 +410,216 @@ fn walker(
     }
 }
 
+// TO FIX:  Write this as a structure with impl method calls.
+// Eliminate the passing of state via the function calls.
+
 fn element_walker(element: scraper::element_ref::ElementRef<'_>) -> Vec<VerseInformation> {
     let mut result = Vec::<VerseInformation>::new();
     let mut classes = HashSet::<String>::new();
-    walker(element, &mut result, &mut classes, 0);
+    let mut track = HashMap::<String, String>::new();
+    walker(element, &mut result, &mut classes, &mut track, 0);
     result
 }
+*/
+
+pub struct WalkerParser {
+    results: Vec<VerseInformation>,
+    classes: HashSet<String>,
+    paragraph: bool,
+    chapter_verse: String,
+}
+
+impl WalkerParser {
+    pub fn new() -> Self {
+        Self {
+            results: Vec::<VerseInformation>::new(),
+            classes: HashSet::<String>::new(),
+            paragraph: false,
+            chapter_verse: String::new(),
+        }
+    }
+
+    /// Reset the parser's internal state.
+    pub fn clear(&mut self) {
+        self.results.clear();
+        self.classes.clear();
+        self.paragraph = false;
+        self.chapter_verse.clear();
+    }
+
+    /// Extract element text, trimmed of whitespace.
+    fn element_text(element: scraper::ElementRef<'_>) -> String {
+        let text = element
+            .text()
+            .map(|s| s.trim_matches(char::is_whitespace))
+            .filter(|x| !x.is_empty())
+            .collect::<String>();
+        text
+    }
+
+    /// Clean element class, and return in a set.
+    ///
+    /// Classes that have __ in them are returned without the __ and ...
+    fn clean_class(element: scraper::element_ref::ElementRef<'_>) -> HashSet<String> {
+        let mut result = HashSet::<String>::new();
+        if let Some(e_class) = element.attr("class") {
+            for c in e_class.split(" ") {
+                if let Some(chapter) = c.split_once("__") {
+                    result.insert(chapter.0.to_string());
+                } else {
+                    result.insert(c.to_string());
+                }
+            }
+        }
+        result
+    }
+
+    /// Add note
+    ///
+    /// This will append to a previous note, if the last item in result
+    /// is a VerseInformation::Note.
+    fn add_note(&mut self, note: &str) {
+        if let Some(last) = self.results.last_mut() {
+            if let VerseInformation::Note(n) = last {
+                n.push_str(" ");
+                n.push_str(note);
+                return;
+            }
+        }
+        self.results.push(VerseInformation::Note(note.to_string()));
+    }
+
+    fn add_content(&mut self, c: VerseInformation) {
+        if let VerseInformation::Content{text: ref c_text, quoted: c_q, paragraph: c_p, red: c_r} = c {
+            // I have the Content in a more usable form.
+            let mut insert = false;
+            if let Some(last) = self.results.last_mut() {
+                if let VerseInformation::Content { text: l_text, quoted: l_q, paragraph: l_p, red: l_r } = last {
+                    if *l_q != c_q || *l_r != c_r {
+                        insert = true;
+                    }       
+                    if c_p {
+                        insert = true;
+                    }
+                    // Tests are done.
+                    if !insert {
+                        l_text.push_str(" ");
+                        l_text.push_str(&c_text);
+                        return;
+                    }
+                }
+            }
+            self.results.push(c);
+        } else {
+            panic!("Expected VerseInformation::Content not {:?}", c);
+        }
+    }
+
+    /// Recursively called to handle child elements.
+    ///
+    /// self.classes contains the parent's classes.
+    /// class_hash contains the current element's classes.
+    fn recursive_walker(&mut self, element: scraper::element_ref::ElementRef<'_>) {
+        let class_hash = Self::clean_class(element);
+        if self.classes.contains("ChapterContent_note") {
+            // We're in the note.
+
+            if class_hash.contains("ChapterContent_body") {
+                // Note body.
+                let mut has_children = false;
+                for child in element.child_elements() {
+                    has_children = true;
+                    if let Some(cl) = child.attr("class") {
+                        if cl.contains("_label__") || cl.contains("_fr__") {
+                            continue;
+                        }
+                    }
+                    let text = Self::element_text(child);
+                    if !text.is_empty() {
+                        self.add_note(&Self::element_text(child));
+                    }
+                }
+
+                if !has_children {
+                    let text = Self::element_text(element);
+                    if !text.is_empty() {
+                        self.add_note(&text);
+                    }
+                }
+                // Since we've handled children elements here, we're done here.
+                return;
+            }
+        }
+
+        if class_hash.contains("ChapterContent_verse") {
+            if let Some(ch_v) = element.attr("data-usfm") {
+                if self.chapter_verse != ch_v {
+                    self.chapter_verse = ch_v.to_string();
+                    self.results
+                        .push(VerseInformation::ChapterVerse(ch_v.to_string()));
+                }
+            }
+        }
+
+        if class_hash.contains("ChapterContent_content") {
+            // Content.
+            let quoted = self.classes.contains("ChapterContent_q1")
+                || self.classes.contains("ChapterContent_q2");
+            let red = self.classes.contains("ChapterContent_wj")
+                || self.classes.contains("ChapterContent_wordsofchrist");
+            let text = Self::element_text(element);
+            if !text.is_empty() {
+                let paragraph = self.paragraph;
+                if paragraph {
+                    self.paragraph = false;
+                }
+
+                self.add_content(
+                VerseInformation::Content {
+                    text,
+                    quoted,
+                    paragraph,
+                    red,
+                });
+            }
+        }
+
+        if class_hash.contains("ChapterContent_heading") {
+            let text = Self::element_text(element);
+            if !text.is_empty() {
+                self.results.push(VerseInformation::Heading(text));
+            }
+        }
+
+        if class_hash.contains("ChapterContent_p") {
+            self.paragraph = true;
+        }
+
+        // Unfortunately, has_children always returns true?
+
+        if element.has_children() {
+            // Add element classes to class tracker.
+            for element_class in class_hash.iter() {
+                self.classes.insert(element_class.clone());
+            }
+
+            for child in element.child_elements() {
+                self.recursive_walker(child);
+            }
+
+            for element_class in class_hash.iter() {
+                self.classes.remove(element_class);
+            }
+        }
+    }
+
+    /// Parse the element (and children) into VerseInformation.
+    pub fn parse(&mut self, element: scraper::element_ref::ElementRef<'_>) -> &[VerseInformation] {
+        self.clear();
+        self.recursive_walker(element);
+        self.results.as_slice()
+    }
+}
 
 /// Extract just the Chapter's verses.
 ///
@@ -374,6 +643,8 @@ pub fn extract_verses(filename: &str) -> Result<(String, u8, config::BasicVerses
     }
 
     let mut chapter_number: u8 = 0;
+    let mut verse_number: u8 = 0;
+    let mut walker = WalkerParser::new();
 
     // Locate the div that contains all of the chapter verses
     let chapter_selector = scraper::Selector::parse(r#"div[class*="_chapter__"]"#).unwrap();
@@ -381,10 +652,67 @@ pub fn extract_verses(filename: &str) -> Result<(String, u8, config::BasicVerses
         // Ok, this is the chapter section.
 
         // This works amazingly well for showing how the html is structured.
-        // show_structure(chapter, 0);
+        show_structure(chapter, 0);
+        let results = walker.parse(chapter);
+
+        println!("Elements: {:?}", results);
 
-        println!("Elements: {:?}", element_walker(chapter));
+        let mut heading = String::new();
 
+        for r in results {
+            match r {
+                VerseInformation::Heading(h) => {
+                    heading = h.clone();
+                }
+                VerseInformation::ChapterVerse(cv) => {
+                    let parts = cv.split(".").collect::<Vec<_>>();
+                    chapter_number = parts[1].parse().unwrap();
+                    verse_number = parts[2].parse().unwrap();
+
+                    if !heading.is_empty() {
+                        let v = result.verse(verse_number as usize);
+                        v.push(config::BasicVerseJSON::Heading(heading.clone()));
+                        heading.clear();
+                    }
+                    /*
+                    if result.verses.len() < verse_number as usize {
+                        bail!(
+                            "Len = {}, wanting {}",
+                            result.verses.len() + 1,
+                            verse_number
+                        );
+                    }
+                    */
+                }
+                VerseInformation::Note(n) => {
+                    if verse_number == 0 {
+                        println!("DERP! verse_number is zero! Note: {}", n);
+                    } else {
+                    let v = result.verse(verse_number as usize);
+                    v.push(config::BasicVerseJSON::Note(n.clone()));
+                    }
+                }
+                VerseInformation::Content {
+                    text,
+                    quoted,
+                    paragraph,
+                    red,
+                } => {
+                    if verse_number == 0 {
+                        println!("DERP! verse_number is zero! Content: {}!", text);
+                    } else {
+                        let v = result.verse(verse_number as usize);
+                        v.push(config::BasicVerseJSON::Verse {
+                            text: text.to_string(),
+                            paragraph: *paragraph,
+                            quote: *quoted,
+                            red: *red,
+                        });
+                    }
+                }
+            }
+        }
+        /*
         println!("Chapter: {}", chapter.html());
 
         // Look for _s1__ and _p__
@@ -623,6 +951,7 @@ pub fn extract_verses(filename: &str) -> Result<(String, u8, config::BasicVerses
                 }
             }
         }
+        */
     } else {
         bail!("Unable to locate the div tag with _chapter__.");
     }
@@ -630,114 +959,6 @@ pub fn extract_verses(filename: &str) -> Result<(String, u8, config::BasicVerses
     Ok((book, chapter_number, result))
 }
 
-#[deprecated(note = "This is the old version, use extract_verses")]
-pub fn extract_basic_verses(filename: &str) -> Result<Vec<BasicVerse>> {
-    let mut result = Vec::<BasicVerse>::new();
-
-    let buffer = std::fs::read_to_string(filename)?;
-    let document = scraper::Html::parse_document(&buffer);
-    let h1_selector = scraper::Selector::parse("h1").unwrap();
-    let h1 = document.select(&h1_selector).next().unwrap();
-    let mut book = element_text(h1);
-    // println!("Heading: {}", element_text(h1));
-    let mut book_trim = true;
-
-    let span_data_usfm_selector = scraper::Selector::parse("span[data-usfm]").unwrap();
-    let _span_class_selector = scraper::Selector::parse("span[class]").unwrap();
-    let _span_class_content_selector =
-        scraper::Selector::parse(r#"span[class="ChapterContent_content__RrUqA"]"#).unwrap();
-
-    for span in document.select(&span_data_usfm_selector) {
-        // This will always be successful.
-        if let Some(data) = span.attr("data-usfm") {
-            // "GEN.1.2"
-            // let ch_ver = data.split(".").skip(1).collect::<String>();
-
-            let parts = data.split(".").skip(1).collect::<Vec<_>>();
-            let mut chapter_number: u8 = 0;
-            if parts.len() == 2 {
-                chapter_number = parts[0].parse()?;
-            }
-
-            if book_trim {
-                // Only trim the book once.
-                book_trim = false;
-                if chapter_number != 0 {
-                    // Remove chapter number from book.
-                    while book.pop() != Some(' ') {
-                        // Check for a problem.
-                        if book.is_empty() {
-                            bail!(format!(
-                                "Failed to trim the chapter from [{}].",
-                                element_text(h1)
-                            ));
-                        }
-                    }
-                }
-            }
-
-            let verse_number: u8 = parts.last().unwrap_or(&"0").parse()?;
-
-            // GEN, 1, 2
-            // But, there's some books that don't have chapters.  Beware!
-            let text_try = verse_element_text(span);
-            // This looks good.  ;)
-            // println!("{} text: {:?}", data, text_try);
-
-            if let Some(b) = result.get_mut(verse_number as usize - 1) {
-                // Yes, it already exists...
-                // It seems like these should be joined with "\n" instead of " ".
-
-                if !(*b).text.ends_with("\n") && !text_try.starts_with("\n") {
-                    (*b).text.push_str("\n");
-                }
-                b.text.push_str(text_try.as_str());
-            } else {
-                let bv = BasicVerse {
-                    book: book.clone(),
-                    chapter: chapter_number,
-                    verse: verse_number,
-                    text: text_try,
-                };
-
-                result.push(bv);
-            }
-
-            /*
-            // There can be multiples of these with matching values.
-            let lines: String = span
-                .select(&span_class_selector)
-                .filter(|x| {
-                    if let Some(c) = x.attr("class") {
-                        if c.contains("content") {
-                            return true;
-                        }
-                    }
-                    false
-                })
-                .map(|x| {
-                    println!("x = {:?}", element_text(x));
-                    let init = String::new();
-                    let j = x.text().fold(init, |acc, x| {
-                        let mut s = acc;
-                        if x == "  " {
-                            s.push_str("\n");
-                        } else {
-                            s.push_str(x);
-                        }
-                        s
-                    });
-                    j
-                })
-                .collect();
-
-            println!("data {} lines {:?}", data, lines);
-            */
-        }
-    }
-    Ok(result)
-}
-
 pub fn find_next_chapter(html: &String) -> Result<String> {
     let document = scraper::Html::parse_document(html);
     // let a_selector = scraper::Selector::parse("div>a").unwrap();