Explorar el Código

Initial working parser for verses.

Steve Thielemann hace 2 meses
padre
commit
060db0ff68
Se han modificado 2 ficheros con 90 adiciones y 7 borrados
  1. 0 0
      fetch1.html
  2. 90 7
      src/main.rs

La diferencia del archivo ha sido suprimido porque es demasiado grande
+ 0 - 0
fetch1.html


+ 90 - 7
src/main.rs

@@ -1,16 +1,99 @@
-use reqwest::{Client, ClientBuilder};
+use reqwest;
+use std::{fs::File, path::Path, io::Read};
+
 use scraper::{Html, Selector};
 
-static APP_USER_AGENT: &str = "Mozilla/5.0 (X11; Linux x86_64; rv:134.0) Gecko/20100101 Firefox/134.0";
+static APP_USER_AGENT: &str =
+    "Mozilla/5.0 (X11; Linux x86_64; rv:134.0) Gecko/20100101 Firefox/134.0";
+static URL: &str = "https://www.bible.com/bible/59/GEN.1.ESV";
 
 fn main() {
     let client = reqwest::blocking::Client::builder()
-    .user_agent(APP_USER_AGENT)
-    .build().unwrap();
+        .user_agent(APP_USER_AGENT)
+        .build()
+        .unwrap();
 
-    let res = client.get("https://httpbin.org/anything").send().unwrap();
-    println!("anything: {}", res.text().unwrap());
+    // let res = client.get("https://httpbin.org/anything").send().unwrap();
+    // println!("anything: {}", res.text().unwrap());
 
-    // let res = client.get("https://www.bible.com/bible/59/GEN.1.ESV").send().unwrap();
+    let mut file = File::open(Path::new("fetch1.html")).unwrap();
+    let mut buffer = String::new();
+    let _ = file.read_to_string(&mut buffer);
+    drop(file);
+
+    /* 
+    let res = client.get(URL).send().unwrap();
+    let buffer = res.text().unwrap();
+    println!("{}", res.text().unwrap());
+    */
+
+    let document = scraper::Html::parse_document(&buffer);
+    let h1_selector = scraper::Selector::parse("h1").unwrap();
 
+    let h1 = document.select(&h1_selector).next().unwrap();
+    println!("h1 = {:?}", h1.text().collect::<Vec<_>>());
+
+    let span_selector = scraper::Selector::parse("span").unwrap();
+    for span in document.select(&span_selector) {
+        if span.attr("data-usfm").is_some() {
+            println!("span: {:?}", span.text().collect::<Vec<_>>());
+        }
+    }
+    // let res = client.get("https://www.bible.com/bible/59/GEN.1.ESV").send().unwrap();
 }
+
+/*
+Book/chapter: <h1>
+<h1>Genesis 1</h1>
+
+<div data-usfm="">, <span data-usfm="">
+
+<div data-usfm="GEN.1" class=
+                        "ChapterContent_chapter__uvbXo">
+                          <div class="ChapterContent_label__R2PLt">
+                            1
+                          </div>
+                          <div class="ChapterContent_s1__bNNaW">
+                            <span class=
+                            "ChapterContent_heading__xBDcs">The
+                            Creation of the World</span>
+                          </div>
+                          <div class="ChapterContent_p__dVKHb">
+                            <span data-usfm="GEN.1.1" class=
+                            "ChapterContent_verse__57FIw"><span class="ChapterContent_label__R2PLt">
+                            1</span><span class=
+                            "ChapterContent_content__RrUqA">In
+                            the</span> <span class=
+                            "ChapterContent_note__YlDW0 ChapterContent_x__tsTlk">
+                            <span class=
+                            "ChapterContent_label__R2PLt">#</span><span class="ChapterContent_body__O3qjr">Job
+                            38:4-7; Ps. 33:6; 136:5; Isa. 42:5;
+                            45:18; John 1:1-3; Acts 14:15; 17:24;
+                            Col. 1:16, 17; Heb. 1:10; 11:3; Rev.
+                            4:11</span></span><span class=
+                            "ChapterContent_content__RrUqA">beginning,
+                            God created the heavens and the
+                            earth.</span></span>
+
+   <span data-usfm=
+                            "GEN.1.2" class=
+                            "ChapterContent_verse__57FIw"><span class="ChapterContent_label__R2PLt">
+                            2</span><span class=
+                            "ChapterContent_content__RrUqA">The
+                            earth was</span> <span class=
+                            "ChapterContent_note__YlDW0 ChapterContent_x__tsTlk">
+                            <span class=
+                            "ChapterContent_label__R2PLt">#</span><span class="ChapterContent_body__O3qjr">Jer.
+                            4:23</span></span><span class=
+                            "ChapterContent_content__RrUqA">without
+                            form and void, and darkness was over
+                            the face of the deep. And the Spirit of
+                            God was hovering over the face of the
+                            waters.</span></span>                         
+Next page link:
+
+            <div class="[pointer-events:all]">
+              <a href="/bible/59/GEN.2.ESV">
+
+
+*/

Algunos archivos no se mostraron porque demasiados archivos cambiaron en este cambio