Преглед на файлове

Testing reqwest without (and with) tokio (memory).

Steve Thielemann преди 2 седмици
родител
ревизия
5f8d5dc4fa
променени са 3 файла, в които са добавени 116 реда и са изтрити 56 реда
  1. 1 0
      Cargo.toml
  2. 71 32
      src/cache.rs
  3. 44 24
      src/main.rs

+ 1 - 0
Cargo.toml

@@ -5,6 +5,7 @@ edition = "2021"
 
 [dependencies]
 anyhow = "1.0.95"
+# clap = { version = "4.5.29", features = ["derive", "wrap_help"] }
 clap = { version = "4.5.29", features = ["derive"] }
 reqwest = { version = "0.12.15", features = ["blocking", "brotli", "deflate", "gzip"] }
 scraper = "0.22.0"

+ 71 - 32
src/cache.rs

@@ -22,8 +22,13 @@ pub fn relative_to_absolute(base_url: &str, relative_href: &str) -> Result<Strin
     Ok(new_url.to_string())
 }
 
+/*
+Or maybe I should just use the replacements only, so I have some
+idea where the file came from?
+ */
+
 /// Extract filename from the end of a URL.
-/// 
+///
 /// If this doesn't have a usable path, convert url:
 /// * Remove https:// part
 /// * Replace / with -
@@ -101,16 +106,18 @@ pub fn load_headermap(filename: &str) -> Result<reqwest::header::HeaderMap> {
 /// Caching web calls
 ///
 /// Set the directory, and we're ready to make cached web calls.
+/// Since we're not storing the file in memory now, max_size isn't
+/// the concern it once was.
 pub struct Cache {
     /// Directory where cache is stored
     pub directory: PathBuf,
     // This is where we select async or blocking.
     /// Reqwest Client
     pub client: reqwest::blocking::Client,
-    /// Vector of content-types to accept
+    /// Vector of content-types to accept (empty=all)
     pub accept: Vec<String>,
-    /// Max size of content to download
-    pub max_size: u64,
+    /// Max size of content to download (default unlimited)
+    pub max_size: Option<u64>,
 }
 
 /// Status of fetch
@@ -180,16 +187,22 @@ impl Cache {
         Ok(Self {
             directory: dir,
             client: client,
-            accept: vec![
-                "text/html".to_string(),
-                "application/json".to_string(),
-                "text/xml".to_string(),
-                "application/x-gzip".to_string(),
-            ],
-            max_size: 256 * 1024 * 1024, // 256 MB
+            accept: vec![], // Accept all content-type.
+            max_size: None, // Accept any sized content.
+                            // Some(256 * 1024 * 1024), // 256 MB
         })
     }
 
+    #[allow(dead_code)]
+    pub fn add_content_type(&mut self, content_type: String) {
+        self.accept.push(content_type);
+    }
+
+    #[allow(dead_code)]
+    pub fn set_max_size(&mut self, size: u64) {
+        self.max_size = Some(size);
+    }
+
     /// Create safe filename from url for header/content files.
     pub fn url_to_basename(url: &str) -> Result<String> {
         let filename = if url.ends_with("/") {
@@ -285,6 +298,21 @@ impl Cache {
         Ok(result)
     }
 
+    /// Given a url, return an open file
+    /// 
+    /// Reading from cache.
+    #[allow(dead_code)]
+    pub fn file(&self, url: &str) -> Option<std::fs::File> {
+        let base = self
+        .directory
+        .as_path()
+        .join(Self::url_to_basename(url).unwrap());
+        if base.exists() {
+            return Some(std::fs::File::open(base).unwrap());
+        }
+        None
+    }
+
     // I'm not sure about using Result<Status> here...
     // It would allow for ? usage.
 
@@ -292,7 +320,7 @@ impl Cache {
     ///
     /// This returns Status, which could be Fetched or Cached copy (among other things).
     pub fn fetch(&self, url: &str) -> Status {
-        let mut base = self
+        let base = self
             .directory
             .as_path()
             .join(Self::url_to_basename(url).unwrap());
@@ -305,7 +333,12 @@ impl Cache {
         // base.set_extension("header");
         let mut header_file = base.clone();
         // Append .header to the filename.
-        let filename = header_file.file_name().unwrap().to_string_lossy().to_string() + ".header";
+        let filename = header_file
+            .file_name()
+            .unwrap()
+            .to_string_lossy()
+            .to_string()
+            + ".header";
         header_file.set_file_name(filename);
 
         if header_file.exists() {
@@ -340,27 +373,34 @@ impl Cache {
         if result.status() == 200 {
             // Success!
 
-            if let Some(len) = result.content_length() {
-                if len > self.max_size {
-                    // Is there a way to abort this safely?  Apparently yes! :D
-
-                    // let byte = Byte::from_u64(len);
-                    // let adjusted_byte = byte.get_appropriate_unit(UnitType::Binary);
-                    // println!("Too Big! {adjusted_byte:.2} {}", url);
-                    return Status::TooBig(len);
+            // Only check content_length size, if we have been
+            // given a max_size.
+            if let Some(max_size) = self.max_size {
+                if let Some(len) = result.content_length() {
+                    if len > max_size {
+                        // Is there a way to abort this safely?  Apparently yes! :D
+
+                        // let byte = Byte::from_u64(len);
+                        // let adjusted_byte = byte.get_appropriate_unit(UnitType::Binary);
+                        // println!("Too Big! {adjusted_byte:.2} {}", url);
+                        return Status::TooBig(len);
+                    }
                 }
             }
 
-            if let Some(content_type) = result.headers().get("content-type") {
-                // Check to see if accepted content.
-                let mut ct = content_type.to_str().unwrap();
-                let possible = content_type.to_str().unwrap().split_once(';');
-                if let Some((ct_part, _)) = possible {
-                    ct = ct_part;
-                }
-                if !self.accept.contains(&ct.to_string()) {
-                    // println!("Unacceptable content-type {} {}", ct, url);
-                    return Status::Unacceptable(ct.to_string());
+            // Only check acceptable content_types if given.
+            if !self.accept.is_empty() {
+                if let Some(content_type) = result.headers().get("content-type") {
+                    // Check to see if accepted content.
+                    let mut ct = content_type.to_str().unwrap();
+                    let possible = content_type.to_str().unwrap().split_once(';');
+                    if let Some((ct_part, _)) = possible {
+                        ct = ct_part;
+                    }
+                    if !self.accept.contains(&ct.to_string()) {
+                        // println!("Unacceptable content-type {} {}", ct, url);
+                        return Status::Unacceptable(ct.to_string());
+                    }
                 }
             }
 
@@ -396,6 +436,5 @@ impl Cache {
             println!("Error {} {}", result.status(), url);
             return Status::ErrorStatus(u16::from(result.status()));
         }
-        Status::Unacceptable("What?".to_string())
     }
 }

+ 44 - 24
src/main.rs

@@ -3,25 +3,37 @@ use cache::relative_to_absolute;
 use clap::{Parser, Subcommand};
 use std::env;
 use std::fs::File;
+use std::io::{BufRead, BufReader};
 use std::path::PathBuf;
-use std::io::{BufRead, BufReader}; // , Write, stdout};
 use std::process::Command;
 
 mod cache;
 
+// I'm not sure if reqwest::blocking's write_to is working right or not.
+// It seems like it might be storing the entire file in memory...
+// Which I don't want.  I might have to go back to tokio and chunk.
+
 // see reqwest/web-o/src/cache.rs for example cache
 // It restores reqwest::header::HeaderMap
 // (which allows duplicates... and ignores case on keys)
 
 #[derive(Parser)]
-#[command(about = "Go updater")]
+#[command(
+    about = "Go Updater",
+    long_about = "Go Updater
+
+This checks the https://go.dev/dl for newer versions of go.
+It depends upon the GOPATH and GOROOT environment variables being set.
+
+This can't update a package manager installed version of go (permissions)."
+)]
 struct Cli {
     /// Cache directory path
-    #[arg(default_value = "cache")]
+    #[arg(short, long, default_value = "cache")]
     cache: PathBuf,
 
     #[command(subcommand)]
-    command: Option<Commands>,
+    command: Commands, // command: Option<Commands>,
 }
 
 #[derive(Subcommand)]
@@ -44,7 +56,7 @@ fn find_go_version() -> Result<String> {
 }
 
 /// Locate the go binary
-/// 
+///
 /// This is redundant, it should be located via GO_PATH.
 #[allow(dead_code)]
 #[must_use]
@@ -88,7 +100,7 @@ and grab the section of td's.  class=filename has a href, last has SHA256.
 
  */
 
-/* 
+/*
 fn download_and_save(url: &str, filename: &str) -> Result<()> {
     let client = reqwest::blocking::Client::builder()
         .user_agent(APP_USER_AGENT)
@@ -127,9 +139,9 @@ fn version_from_url(url: &str, arch: &str) -> Option<String> {
 #[must_use]
 /// Get go version from `go version` output.
 fn version_from_go(text: &str) -> Option<String> {
-    let parts : Vec<&str> = text.split(' ').collect();
+    let parts: Vec<&str> = text.split(' ').collect();
     if parts.len() == 4 {
-        return Some(parts[2].to_string().replace("go", ""))
+        return Some(parts[2].to_string().replace("go", ""));
     }
     None
 }
@@ -137,19 +149,19 @@ fn version_from_go(text: &str) -> Option<String> {
 /// Return just the href="<return this part>".
 #[must_use]
 fn just_href(link: &str) -> Result<String> {
-    let parts  = link.split_once("href=\"").unwrap_or(("","")).1;
-    let href  = parts.split_once("\"").unwrap_or(("", "")).0;
+    let parts = link.split_once("href=\"").unwrap_or(("", "")).1;
+    let href = parts.split_once("\"").unwrap_or(("", "")).0;
     if !href.is_empty() {
-            return Ok(href.to_string());
+        return Ok(href.to_string());
     }
     bail!("Unable to locate href");
 }
 
 /// Find a href link for given arch (architecture)
-/// 
+///
 /// Look for <a class="download" href="
 #[must_use]
-fn find_arch_link(arch: &str, fp:&File) -> Result<String> {
+fn find_arch_link(arch: &str, fp: &File) -> Result<String> {
     let reader = BufReader::new(fp);
     for line in reader.lines() {
         if let Ok(line) = line {
@@ -165,10 +177,12 @@ fn find_arch_link(arch: &str, fp:&File) -> Result<String> {
 }
 
 /// find_link for given arch (architecture)
-/// 
+///
 /// Look for <a class="download" href=""
 #[must_use]
-fn find_link(arch: &str) -> Result<String> { // , Box<dyn Error>> {
+#[deprecated]
+fn find_link(arch: &str) -> Result<String> {
+    // , Box<dyn Error>> {
     let fp = File::open(GO_FILE)?;
     let reader = BufReader::new(fp);
     for line in reader.lines() {
@@ -215,7 +229,7 @@ fn main() -> Result<()> {
     let mut arch = parts.last().unwrap().to_string();
     arch = arch.replace("/", "-");
 
-    /* 
+    /*
     println!("GO_PATH  {}", go_path);
     println!("GO_ROOT  {}", go_root);
     println!("version: {}", go_version);
@@ -229,7 +243,7 @@ fn main() -> Result<()> {
     // Get go version and path
 
     match &cli.command {
-        Some(Commands::Update {}) => {
+        Commands::Update {} => {
             let status = cache.fetch(GO_URL);
 
             // Check to see if file already exists AND
@@ -247,6 +261,7 @@ fn main() -> Result<()> {
                         if let Some(latest) = latest_version {
                             println!("Version: {} [have {}]", latest, version);
                             if version != latest {
+                                println!("Downloading newer version...");
                                 let latest_status = cache.fetch(&abs);
                                 println!("Latest: {:?}", latest_status);
                             }
@@ -254,7 +269,7 @@ fn main() -> Result<()> {
                             println!("Finding version failed for string: [{}]", abs);
                         }
                     }
-                },
+                }
                 cache::Status::Cached(fp) => {
                     println!("(from cache)"); // I wish I could see this.
 
@@ -269,18 +284,23 @@ fn main() -> Result<()> {
                 }
             }
         }
-        Some(Commands::Info {}) => {
+        Commands::Info {} => {
             println!("GO_PATH  {}", go_path);
             println!("GO_ROOT  {}", go_root);
             println!("go ver:  {}", go_version);
             println!("version: {}", version);
             println!("where:   {}", go_where);
             println!("arch:    {}", arch);
-        }
-        None => {
-            // Display help.
-            let _show_help: Cli = Cli::parse_from(["--help"]);
-        }
+        } /*
+          None => {
+              // Display help.
+              println!("No option selected.  Oops.");
+              cli
+              // Why isn't this working?
+              let _show_help: Cli = Cli::parse_from(["--help"]);  // WHAT?
+              println!("... DERP?");
+          }
+          */
     }
     Ok(())
 }