|
@@ -1,10 +1,7 @@
|
|
|
// use sha256;
|
|
|
use anyhow::{Context, Result, bail};
|
|
|
-use std::fs;
|
|
|
-use std::fs::remove_file;
|
|
|
-use std::io::BufRead;
|
|
|
-use std::io::BufReader;
|
|
|
-use std::io::Write;
|
|
|
+use std::fs::{File, remove_file, read_dir, create_dir_all};
|
|
|
+use std::io::{BufRead, BufReader, Write};
|
|
|
use std::path::PathBuf;
|
|
|
use std::time::{Duration, SystemTime};
|
|
|
use url::Url;
|
|
@@ -13,6 +10,8 @@ use url::Url;
|
|
|
// #[warn(missing_docs)]
|
|
|
|
|
|
/// Convert relate to absolute
|
|
|
+///
|
|
|
+/// This can fail if Url is unable to parse, or Url is unable to join.
|
|
|
#[must_use]
|
|
|
pub fn relative_to_absolute(base_url: &str, relative_href: &str) -> Result<String> {
|
|
|
let base_url = Url::parse(base_url).context(format!("Url::parse({})", base_url))?;
|
|
@@ -25,8 +24,11 @@ pub fn relative_to_absolute(base_url: &str, relative_href: &str) -> Result<Strin
|
|
|
/*
|
|
|
Or maybe I should just use the replacements only, so I have some
|
|
|
idea where the file came from?
|
|
|
+
|
|
|
+It is nice having a clean filename go1.24.1.tar.gz in the cache directory.
|
|
|
*/
|
|
|
|
|
|
+/*
|
|
|
/// Extract filename from the end of a URL.
|
|
|
///
|
|
|
/// If this doesn't have a usable path, convert url:
|
|
@@ -51,6 +53,7 @@ pub fn filename_from_url(url: &str) -> Result<String> {
|
|
|
}
|
|
|
Ok(filename.to_string())
|
|
|
}
|
|
|
+*/
|
|
|
|
|
|
// Display a number as K, M, G, T, etc.
|
|
|
// pub fn display_bytes(size: u64) -> String { }
|
|
@@ -59,12 +62,16 @@ pub fn filename_from_url(url: &str) -> Result<String> {
|
|
|
///
|
|
|
/// This also stores the url in the file, so I know what URL was called for
|
|
|
/// this reqwest.
|
|
|
+///
|
|
|
+/// It has each item on a single line:
|
|
|
+/// header: value
|
|
|
+/// The first line will be url: (Which is not part of original header.)
|
|
|
pub fn save_headermap(
|
|
|
filename: &str,
|
|
|
url: &str,
|
|
|
header: &reqwest::header::HeaderMap,
|
|
|
) -> Result<()> {
|
|
|
- let mut fp = std::fs::File::create(filename)?;
|
|
|
+ let mut fp = File::create(filename)?;
|
|
|
|
|
|
fp.write_all(format!("url: {}\n", url).as_bytes())?;
|
|
|
for (key, value) in header.iter() {
|
|
@@ -79,7 +86,7 @@ pub fn save_headermap(
|
|
|
///
|
|
|
/// This will have the url of the original call in the "url" section.
|
|
|
pub fn load_headermap(filename: &str) -> Result<reqwest::header::HeaderMap> {
|
|
|
- let fp = std::fs::File::open(filename)?;
|
|
|
+ let fp = File::open(filename)?;
|
|
|
let mut buffer = BufReader::new(fp);
|
|
|
let mut line = String::new();
|
|
|
let mut header = reqwest::header::HeaderMap::new();
|
|
@@ -111,7 +118,7 @@ pub fn load_headermap(filename: &str) -> Result<reqwest::header::HeaderMap> {
|
|
|
pub struct Cache {
|
|
|
/// Directory where cache is stored
|
|
|
pub directory: PathBuf,
|
|
|
- // This is where we select async or blocking.
|
|
|
+ // *This is where we would select async or blocking.*
|
|
|
/// Reqwest Client
|
|
|
pub client: reqwest::blocking::Client,
|
|
|
/// Vector of content-types to accept (empty=all)
|
|
@@ -120,15 +127,18 @@ pub struct Cache {
|
|
|
pub max_size: Option<u64>,
|
|
|
}
|
|
|
|
|
|
+// Should I also have std::io::Errors in here as well?
|
|
|
+// I can have File IO errors.
|
|
|
+
|
|
|
/// Status of fetch
|
|
|
#[allow(dead_code)]
|
|
|
#[derive(Debug)]
|
|
|
#[repr(u8)]
|
|
|
pub enum Status {
|
|
|
/// File was downloaded.
|
|
|
- Fetched(std::fs::File) = 0,
|
|
|
+ Fetched(PathBuf),
|
|
|
/// File was retrieved from cache.
|
|
|
- Cached(std::fs::File),
|
|
|
+ Cached(PathBuf),
|
|
|
/// Reqwest error (unable to connect)
|
|
|
Error(reqwest::Error),
|
|
|
/// Content-Type wasn't allowed, see Cache.accept.
|
|
@@ -154,6 +164,7 @@ application/javascript
|
|
|
|
|
|
// If nothing is given for useragent, we default to the application name and version.
|
|
|
static APP_USER_AGENT: &str = concat!(env!("CARGO_PKG_NAME"), "/", env!("CARGO_PKG_VERSION"),);
|
|
|
+static HEADER_EXT: &str = ".header";
|
|
|
|
|
|
impl Cache {
|
|
|
/// Construct Cache using given directory for caching, and useragent.
|
|
@@ -170,7 +181,7 @@ impl Cache {
|
|
|
);
|
|
|
}
|
|
|
} else {
|
|
|
- fs::create_dir_all(path).context(format!("Cache dir {}", path.display()))?;
|
|
|
+ create_dir_all(path).context(format!("Create cache dir {}", path.display()))?;
|
|
|
}
|
|
|
|
|
|
let user_agent = if let Some(ua) = useragent {
|
|
@@ -236,11 +247,12 @@ impl Cache {
|
|
|
///
|
|
|
/// Use DirEntry.modified, since it updates when a file is freshened/downloaded.
|
|
|
/// DirEntry.created isn't updated when file is rewritten.
|
|
|
+ #[allow(dead_code)]
|
|
|
pub fn expire(&self, age: Duration) -> Result<bool> {
|
|
|
let now = SystemTime::now();
|
|
|
let mut result: bool = false;
|
|
|
|
|
|
- for file in std::fs::read_dir(self.directory.as_path())? {
|
|
|
+ for file in read_dir(self.directory.as_path())? {
|
|
|
let file = file?;
|
|
|
if let Ok(d) = file.metadata() {
|
|
|
if d.is_file() {
|
|
@@ -248,7 +260,8 @@ impl Cache {
|
|
|
|
|
|
let filename = String::from(file.file_name().to_str().unwrap());
|
|
|
|
|
|
- if filename.ends_with(".header") {
|
|
|
+ if filename.ends_with(HEADER_EXT) {
|
|
|
+ // This is a header cache file...
|
|
|
if let Ok(modify) = d.modified() {
|
|
|
if let Ok(delta) = now.duration_since(modify) {
|
|
|
// println!("expire {} = modified {}", filename, delta.as_secs());
|
|
@@ -260,7 +273,9 @@ impl Cache {
|
|
|
println!("RemoveFile {:?}: {}", filepath, e);
|
|
|
}
|
|
|
// Also delete .content !
|
|
|
- filepath.set_extension("content");
|
|
|
+ // Which is trickier to find now...
|
|
|
+ Self::remove_from_filename(&mut filepath);
|
|
|
+ // filepath.set_extension("content");
|
|
|
let r = remove_file(&filepath);
|
|
|
if let Err(e) = r {
|
|
|
println!("RemoveFile {:?}: {}", filepath, e);
|
|
@@ -270,7 +285,7 @@ impl Cache {
|
|
|
}
|
|
|
}
|
|
|
}
|
|
|
- }
|
|
|
+ }
|
|
|
|
|
|
/*
|
|
|
if let Ok(access) = d.accessed() {
|
|
@@ -298,48 +313,101 @@ impl Cache {
|
|
|
Ok(result)
|
|
|
}
|
|
|
|
|
|
+ /// Given a url, return the filename
|
|
|
+ ///
|
|
|
+ /// The filename might not exist. It is only the filename
|
|
|
+ /// that would be used for the given url.
|
|
|
+ pub fn filename_for_url(&self, url: &str) -> PathBuf {
|
|
|
+ self.directory
|
|
|
+ .as_path()
|
|
|
+ .join(Self::url_to_basename(url).unwrap())
|
|
|
+ }
|
|
|
+
|
|
|
/// Given a url, return an open file
|
|
|
///
|
|
|
/// Reading from cache.
|
|
|
#[allow(dead_code)]
|
|
|
- pub fn file(&self, url: &str) -> Option<std::fs::File> {
|
|
|
+ pub fn file(&self, url: &str) -> Option<File> {
|
|
|
+ let base = self.filename_for_url(url);
|
|
|
+ /*
|
|
|
let base = self
|
|
|
.directory
|
|
|
.as_path()
|
|
|
.join(Self::url_to_basename(url).unwrap());
|
|
|
+ */
|
|
|
if base.exists() {
|
|
|
- return Some(std::fs::File::open(base).unwrap());
|
|
|
+ return Some(File::open(base).unwrap());
|
|
|
}
|
|
|
None
|
|
|
}
|
|
|
|
|
|
+ /// Return filename from pathbuf as String
|
|
|
+ #[allow(dead_code)]
|
|
|
+ #[must_use]
|
|
|
+ fn pathbuf_filename(path: &PathBuf) -> String {
|
|
|
+ path.file_name().unwrap().to_string_lossy().to_string()
|
|
|
+ }
|
|
|
+
|
|
|
+ /// Add to the PathBuf filename
|
|
|
+ ///
|
|
|
+ /// This is different then PathBuf::set_extension
|
|
|
+ /// which replaces everything.
|
|
|
+ fn append_to_filename(path: &mut PathBuf, append: &str) {
|
|
|
+ // Append to the filename.
|
|
|
+ let filename = path.file_name().unwrap().to_string_lossy().to_string() + append;
|
|
|
+ path.set_file_name(filename);
|
|
|
+ }
|
|
|
+
|
|
|
+ /// Remove an extension from the filename.
|
|
|
+ ///
|
|
|
+ /// Given something.tar.gz.header return something.tar.gz
|
|
|
+ fn remove_from_filename(path: &mut PathBuf) {
|
|
|
+ let filename = Self::pathbuf_filename(path);
|
|
|
+ if let Some(parts) = filename.rsplit_once(".") {
|
|
|
+ path.set_file_name(parts.0);
|
|
|
+ } else {
|
|
|
+ panic!("Unable to locate the trailing extension . from: {}", path.display());
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ /// Fetch, without using the cache.
|
|
|
+ ///
|
|
|
+ /// This deletes the .header cache file, which forces a fetch.
|
|
|
+ #[allow(dead_code)]
|
|
|
+ pub fn fetch_nocache(&self, url: &str) -> Status {
|
|
|
+ let mut base = self.filename_for_url(url);
|
|
|
+ Self::append_to_filename(&mut base, HEADER_EXT);
|
|
|
+ if base.exists() {
|
|
|
+ let r = remove_file(&base);
|
|
|
+ // unlink failed - panic.
|
|
|
+ if r.is_err() {
|
|
|
+ panic!("Unlink {base:?}: {r:?}");
|
|
|
+ }
|
|
|
+ }
|
|
|
+ return self.fetch(url);
|
|
|
+ }
|
|
|
+
|
|
|
// I'm not sure about using Result<Status> here...
|
|
|
// It would allow for ? usage.
|
|
|
|
|
|
/// Fetch the URL from the web
|
|
|
///
|
|
|
/// This returns Status, which could be Fetched or Cached copy (among other things).
|
|
|
+ #[must_use]
|
|
|
pub fn fetch(&self, url: &str) -> Status {
|
|
|
+ let base = self.filename_for_url(url);
|
|
|
+ /*
|
|
|
let base = self
|
|
|
.directory
|
|
|
.as_path()
|
|
|
.join(Self::url_to_basename(url).unwrap());
|
|
|
-
|
|
|
+ */
|
|
|
let mut builder = self.client.get(url);
|
|
|
// Don't send just yet!
|
|
|
// Set some headers to see if page content has changed.
|
|
|
|
|
|
- // This break things - it destroys the file extension (if given)
|
|
|
- // base.set_extension("header");
|
|
|
let mut header_file = base.clone();
|
|
|
- // Append .header to the filename.
|
|
|
- let filename = header_file
|
|
|
- .file_name()
|
|
|
- .unwrap()
|
|
|
- .to_string_lossy()
|
|
|
- .to_string()
|
|
|
- + ".header";
|
|
|
- header_file.set_file_name(filename);
|
|
|
+ Self::append_to_filename(&mut header_file, HEADER_EXT);
|
|
|
|
|
|
if header_file.exists() {
|
|
|
// Ok! We have existing information. Retrieve it.
|
|
@@ -362,17 +430,25 @@ impl Cache {
|
|
|
|
|
|
if result.status() == 304 {
|
|
|
// Cache hit!
|
|
|
-
|
|
|
+ return Status::Cached(base);
|
|
|
+ /*
|
|
|
// println!("Cache hit! 304! {}", url);
|
|
|
// base.set_extension("content");
|
|
|
- let fp = std::fs::File::open(base.to_str().unwrap()).unwrap();
|
|
|
+ let fp = File::open(base.to_str().unwrap()).unwrap();
|
|
|
return Status::Cached(fp);
|
|
|
+ */
|
|
|
}
|
|
|
|
|
|
// Ok! Success!
|
|
|
if result.status() == 200 {
|
|
|
// Success!
|
|
|
|
|
|
+ // When caching fails ―
|
|
|
+ //
|
|
|
+ // If content_length (from previous fetch) matches current?
|
|
|
+ // Could we assume it hasn't changed, and just use cache?
|
|
|
+ // Or would that be a big assumption?
|
|
|
+
|
|
|
// Only check content_length size, if we have been
|
|
|
// given a max_size.
|
|
|
if let Some(max_size) = self.max_size {
|
|
@@ -404,33 +480,24 @@ impl Cache {
|
|
|
}
|
|
|
}
|
|
|
|
|
|
- // This seems like a lot of work, just to add something.
|
|
|
- // Maybe I should just change the save_header to take the url?
|
|
|
- /*
|
|
|
- // Add URL to the header, so I know what I fetched...
|
|
|
- let mut header2 = result.headers().clone();
|
|
|
- let head = reqwest::header::HeaderName::from_bytes(b"URL").unwrap();
|
|
|
- if let Ok(value) = reqwest::header::HeaderValue::from_str(url) {
|
|
|
- header2.insert(head, value);
|
|
|
- }
|
|
|
- */
|
|
|
-
|
|
|
let r = save_headermap(header_file.to_str().unwrap(), url, result.headers());
|
|
|
if r.is_err() {
|
|
|
println!("save_headermap: {} {:?}", r.unwrap_err(), header_file);
|
|
|
}
|
|
|
|
|
|
// base.set_extension("content");
|
|
|
- let mut fp = std::fs::File::create(base.to_str().unwrap()).unwrap();
|
|
|
+ let mut fp = File::create(base.to_str().unwrap()).unwrap();
|
|
|
let _ = result.copy_to(&mut fp);
|
|
|
/*
|
|
|
while let Ok(Some(chunk)) = result.chunk().await {
|
|
|
let _ = fp.write(&chunk);
|
|
|
}
|
|
|
*/
|
|
|
-
|
|
|
- let fp = std::fs::File::open(base.to_str().unwrap()).unwrap();
|
|
|
+ return Status::Fetched(base);
|
|
|
+ /*
|
|
|
+ let fp = File::open(base.to_str().unwrap()).unwrap();
|
|
|
return Status::Fetched(fp);
|
|
|
+ */
|
|
|
} else {
|
|
|
// Status error?!
|
|
|
println!("Error {} {}", result.status(), url);
|
|
@@ -438,3 +505,39 @@ impl Cache {
|
|
|
}
|
|
|
}
|
|
|
}
|
|
|
+
|
|
|
+#[cfg(test)]
|
|
|
+mod tests {
|
|
|
+ use super::*;
|
|
|
+ use testdir::testdir;
|
|
|
+
|
|
|
+ #[test]
|
|
|
+ fn cache_fetch() {
|
|
|
+ let mut dir = testdir!();
|
|
|
+ dir.push("cache");
|
|
|
+
|
|
|
+ // Make a copy of the cache directory PathBuf for verifying paths.
|
|
|
+ let mut t = dir.clone();
|
|
|
+
|
|
|
+ let cache = Cache::new(dir, None).unwrap();
|
|
|
+ let r = cache.fetch("https://httpbin.org/anything");
|
|
|
+
|
|
|
+ t.push("anything");
|
|
|
+
|
|
|
+ if let Status::Fetched(f) = r {
|
|
|
+ assert!(f.exists(), "Cache file exists.");
|
|
|
+ assert_eq!(f, t, "Cache path is what we were expecting.");
|
|
|
+ let mut header_file = t.clone();
|
|
|
+ Cache::append_to_filename(&mut header_file, HEADER_EXT);
|
|
|
+
|
|
|
+ assert!(header_file.exists(), "Cache header file exists.");
|
|
|
+ t.pop();
|
|
|
+ t.push("anything.header");
|
|
|
+ assert_eq!(header_file, t, "Cache header path is what we expected.");
|
|
|
+ } else {
|
|
|
+ panic!("Cache Status is not Status::Fetched, is: {:?}", r);
|
|
|
+ }
|
|
|
+
|
|
|
+ // println!("Dir: {:?}, Status: {:?}", t, r); // r has been partially moved.
|
|
|
+ }
|
|
|
+}
|