123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811 |
- use std::fs::{File, create_dir_all, read_dir, remove_file};
- use std::io::{BufRead, BufReader, Write};
- use std::path::PathBuf;
- use std::result::Result;
- use std::time::{Duration, SystemTime};
- use url::Url;
- use std::error::Error as Errorr;
- use std::fmt;
- #[deny(missing_docs)]
- #[must_use]
- pub fn relative_to_absolute(
- base_url: &str,
- relative_href: &str,
- ) -> Result<String, url::ParseError> {
- let base_url = Url::parse(base_url)?;
- let new_url = base_url.join(relative_href)?;
- Ok(new_url.to_string())
- }
- pub fn save_headermap(
- filename: &str,
- url: &str,
- header: &reqwest::header::HeaderMap,
- ) -> Result<(), std::io::Error> {
- let mut fp = File::create(filename)?;
- fp.write_all(format!("url: {}\n", url).as_bytes())?;
- for (key, value) in header.iter() {
- if let Ok(value) = value.to_str() {
- fp.write_all(format!("{}: {}\n", key, value).as_bytes())?;
- }
- }
- Ok(())
- }
- pub fn load_headermap(filename: &str) -> Result<reqwest::header::HeaderMap, std::io::Error> {
- let fp = File::open(filename)?;
- let mut buffer = BufReader::new(fp);
- let mut line = String::new();
- let mut header = reqwest::header::HeaderMap::new();
- loop {
- if buffer.read_line(&mut line).unwrap() == 0 {
- break;
- };
- let temp = line.trim_end();
- if let Some(parts) = temp.split_once(": ") {
- let head = reqwest::header::HeaderName::from_bytes(parts.0.as_bytes()).unwrap();
- if let Ok(value) = reqwest::header::HeaderValue::from_str(&parts.1) {
- header.insert(head, value);
- }
- }
- line.clear();
- }
- Ok(header)
- }
- pub struct Cache {
-
- pub directory: PathBuf,
-
-
- pub client: reqwest::blocking::Client,
-
- pub accept: Vec<String>,
-
- pub max_size: Option<u64>,
- }
- #[allow(dead_code)]
- #[derive(Debug)]
- #[repr(u8)]
- pub enum Status {
-
- Fetched(PathBuf),
-
- Cached(PathBuf),
- }
- impl Status {
-
- pub fn download_path(&self) -> &PathBuf {
- match self {
- Status::Fetched(path) | Status::Cached(path) => {
- return path;
- }
- }
- }
- }
- #[derive(Debug)]
- pub enum Error {
-
- ReqwestError(reqwest::Error),
- IOError(std::io::Error),
-
- Unacceptable(String),
-
- TooBig(u64),
-
- HttpErrorStatus(u16),
- }
- impl From<std::io::Error> for Error {
- fn from(e: std::io::Error) -> Self {
- Self::IOError(e)
- }
- }
- impl From<reqwest::Error> for Error {
- fn from(e: reqwest::Error) -> Self {
- Self::ReqwestError(e)
- }
- }
- impl fmt::Display for Error {
- fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
- match self {
- Error::ReqwestError(e) => write!(f, "ReqwestError: {:?}", e),
- Error::IOError(e) => write!(f, "IOError: {:?}", e),
- Error::Unacceptable(ct) => write!(f, "Content-Type {} not allowed", ct),
- Error::TooBig(size) => write!(f, "Content-Size {} too big", size),
- Error::HttpErrorStatus(status) => write!(f, "Status Code: {}", status),
- }
- }
- }
- impl Errorr for Error {}
- static APP_USER_AGENT: &str = concat!(env!("CARGO_PKG_NAME"), "/", env!("CARGO_PKG_VERSION"),);
- static HEADER_EXT: &str = ".header";
- impl Cache {
-
- pub fn new(dir: PathBuf, useragent: Option<&str>) -> Result<Self, Error> {
-
- let path = dir.as_path();
- if path.exists() {
- if !path.is_dir() {
-
- return Err(Error::IOError(std::io::Error::new(
- std::io::ErrorKind::Other,
- format!(
- "Can't create Cache dir {}, it already exists.",
- dir.display()
- ),
- )));
- }
- } else {
- match create_dir_all(path) {
- Err(e) => {
- return Err(Error::IOError(e));
- }
- Ok(_) => {}
- }
- }
- let user_agent = if let Some(ua) = useragent {
- ua
- } else {
- APP_USER_AGENT
- };
-
- match reqwest::blocking::Client::builder()
- .user_agent(user_agent)
- .build()
- {
- Ok(client) => {
- Ok(Self {
- directory: dir,
- client: client,
- accept: vec![],
- max_size: None,
- })
- }
- Err(e) => {
-
- return Err(Error::ReqwestError(e));
- }
- }
- }
- #[allow(dead_code)]
- pub fn add_content_type(&mut self, content_type: String) {
- self.accept.push(content_type);
- }
- #[allow(dead_code)]
- pub fn clear_content_type(&mut self) {
- self.accept.clear();
- }
- #[allow(dead_code)]
- pub fn set_max_size(&mut self, size: u64) {
- self.max_size = Some(size);
- }
- #[allow(dead_code)]
- pub fn clear_max_size(&mut self) {
- self.max_size = None;
- }
-
- pub fn url_to_basename(url: &str) -> String {
- let filename = if url.ends_with("/") {
- ""
- } else {
- if let Some(has_file) = url.rsplit_once("/") {
- has_file.1
- } else {
- ""
- }
- };
- if filename.is_empty() {
-
-
-
- let mut path = url.to_string();
- path = path.replace("https://", "");
- path = path.replace("http://", "");
- path = path.replace("/", "-");
- path = path.replace(".", "-");
- if path.ends_with("-") {
- path.pop();
- }
- return path;
- }
- filename.to_string()
- }
-
-
-
-
- #[allow(dead_code)]
- pub fn expire(&self, age: Duration) -> Result<bool, Error> {
- let now = SystemTime::now();
- let mut result: bool = false;
- for file in read_dir(self.directory.as_path())? {
- let file = file?;
- if let Ok(d) = file.metadata() {
- if d.is_file() {
-
- let filename = String::from(file.file_name().to_str().unwrap());
- if filename.ends_with(HEADER_EXT) {
-
- if let Ok(modify) = d.modified() {
- if let Ok(delta) = now.duration_since(modify) {
-
- if delta > age {
-
- let mut filepath = self.directory.join(filename);
- let r = remove_file(&filepath);
- if let Err(e) = r {
- println!("RemoveFile {:?}: {}", filepath, e);
- }
-
-
- Self::remove_from_filename(&mut filepath);
-
- let r = remove_file(&filepath);
- if let Err(e) = r {
- println!("RemoveFile {:?}: {}", filepath, e);
- }
- result = true;
- }
- }
- }
- }
-
- }
- }
- }
- Ok(result)
- }
-
-
-
-
- pub fn filename_for_url(&self, url: &str) -> PathBuf {
- self.directory.as_path().join(Self::url_to_basename(url))
- }
-
-
-
- #[allow(dead_code)]
- pub fn file(&self, url: &str) -> Option<File> {
- let base = self.filename_for_url(url);
-
- if base.exists() {
- return Some(File::open(base).unwrap());
- }
- None
- }
-
- #[allow(dead_code)]
- #[must_use]
- fn pathbuf_filename(path: &PathBuf) -> String {
- path.file_name().unwrap().to_string_lossy().to_string()
- }
-
-
-
-
- fn append_to_filename(path: &mut PathBuf, append: &str) {
-
- let filename = path.file_name().unwrap().to_string_lossy().to_string() + append;
- path.set_file_name(filename);
- }
-
-
-
- fn remove_from_filename(path: &mut PathBuf) {
- let filename = Self::pathbuf_filename(path);
- if let Some(parts) = filename.rsplit_once(".") {
- path.set_file_name(parts.0);
- } else {
- panic!(
- "Unable to locate the trailing extension . from: {}",
- path.display()
- );
- }
- }
-
-
-
- #[allow(dead_code)]
- pub fn fetch_nocache(&self, url: &str) -> Result<Status, Error> {
- let mut base = self.filename_for_url(url);
- Self::append_to_filename(&mut base, HEADER_EXT);
- if base.exists() {
- match remove_file(&base) {
- Err(e) => {
-
- return Err(Error::IOError(e));
- }
- Ok(_) => {}
- }
- }
- return self.fetch(url);
- }
-
-
-
-
-
- #[must_use]
- pub fn fetch(&self, url: &str) -> Result<Status, Error> {
- let base = self.filename_for_url(url);
-
- let mut builder = self.client.get(url);
-
-
- let mut header_file = base.clone();
- Self::append_to_filename(&mut header_file, HEADER_EXT);
- if header_file.exists() {
-
- match load_headermap(header_file.to_str().unwrap()) {
- Ok(old_header) => {
-
- if let Some(lastmod) = old_header.get("Last-Modified") {
- builder = builder.header("If-Modified-Since", lastmod);
- } else if let Some(date) = old_header.get("Date") {
-
- builder = builder.header("If-Modified-Since", date);
- }
- if let Some(etag) = old_header.get("etag") {
- builder = builder.header("If-None-Match", etag);
- }
- }
- Err(e) => {
- return Err(Error::IOError(e));
- }
- }
- };
- match builder.send() {
- Ok(mut result) => {
- if result.status() == 304 {
-
- return Ok(Status::Cached(base));
- }
-
- if result.status() == 200 {
-
-
-
-
-
-
-
-
- if let Some(max_size) = self.max_size {
- if let Some(len) = result.content_length() {
- if len > max_size {
-
-
-
-
- return Err(Error::TooBig(len));
- }
- }
- }
-
- if !self.accept.is_empty() {
- if let Some(content_type) = result.headers().get("content-type") {
-
- let mut ct = content_type.to_str().unwrap();
- let possible = content_type.to_str().unwrap().split_once(';');
- if let Some((ct_part, _)) = possible {
- ct = ct_part;
- }
- if !self.accept.contains(&ct.to_string()) {
-
- return Err(Error::Unacceptable(ct.to_string()));
- }
- }
- }
- match save_headermap(header_file.to_str().unwrap(), url, result.headers()) {
- Err(e) => {
- return Err(Error::IOError(e));
- }
- Ok(()) => {}
- }
- match File::create(base.to_str().unwrap()) {
- Ok(mut fp) => match result.copy_to(&mut fp) {
- Ok(_) => {}
- Err(e) => {
- return Err(Error::ReqwestError(e));
- }
- },
- Err(e) => {
- return Err(Error::IOError(e));
- }
- }
-
-
- return Ok(Status::Fetched(base));
- } else {
-
-
- return Err(Error::HttpErrorStatus(u16::from(result.status())));
- }
- }
- Err(e) => {
- return Err(Error::ReqwestError(e));
- }
- }
- }
- }
- #[cfg(test)]
- mod tests {
- use super::*;
- use std::collections::HashMap;
- use testdir::testdir;
- #[test]
- fn relative_test() {
- let rel_abs: HashMap<(&str, &str), &str> = HashMap::from([
- (
- ("http://meow.org/rabbit", "/llama/index.html"),
- "http://meow.org/llama/index.html",
- ),
- (
- ("https://example.com/dir/index.html", "about.html"),
- "https://example.com/dir/about.html",
- ),
- (
- ("https://example.com/dir/index.html", "../and/about.html"),
- "https://example.com/and/about.html",
- ),
- (
- (
- "https://here.com/dir/index.html",
- "http://there.com/about.html",
- ),
- "http://there.com/about.html",
- ),
- ]);
- for (base, url) in rel_abs {
- if let Ok(abs) = relative_to_absolute(base.0, base.1) {
- assert_eq!(abs, url, "Base {}, Rel {} => {}", base.0, base.1, url);
- } else {
- panic!("Failed {} + {} => {}", base.0, base.1, url);
- }
- }
- }
- #[test]
- fn url_to_filename_test() {
- let mut dir = testdir!();
- dir.push("cache");
- let temp = dir.clone();
- let cache = Cache::new(dir, None).unwrap();
-
- let url_base: HashMap<&str, &str> = HashMap::from([
- ("https://go.dev/dl/go1.23.45.tar.gz", "go1.23.45.tar.gz"),
- ("https://go.dev/dl", "dl"),
- ("https://go.dev/dl/", "go-dev-dl"),
- ]);
- for (url, base) in url_base {
-
- let basename = Cache::url_to_basename(url);
- assert_eq!(base, basename, "{} -> {}", url, base);
-
- let path = cache.filename_for_url(url);
- let mut newpath = temp.clone();
- newpath.push(base);
- assert_eq!(path.as_os_str(), newpath.as_os_str(), "{} -> {}", url, base);
- }
- for filename in vec!["go1.23.45.tar.gz", "test.html"] {
- let newname = String::from(filename) + HEADER_EXT;
- let mut newpath = temp.clone();
- newpath.set_file_name(filename);
- Cache::append_to_filename(&mut newpath, HEADER_EXT);
- assert_eq!(
- &newpath.file_name().unwrap().to_string_lossy().to_string(),
- &newname,
- "{} {}",
- filename,
- HEADER_EXT
- );
-
- Cache::remove_from_filename(&mut newpath);
- assert_eq!(
- &newpath.file_name().unwrap().to_string_lossy().to_string(),
- filename,
- "{}",
- filename
- )
- }
- }
- #[test]
- #[cfg(not(feature = "local-httpbin"))]
- fn cache_fetch() {
- let mut dir = testdir!();
- dir.push("cache");
-
- let mut t = dir.clone();
- let cache = Cache::new(dir, None).unwrap();
- let r = cache.fetch("https://httpbin.org/anything");
- t.push("anything");
- if let Ok(r) = r {
- if let Status::Fetched(f) = r {
- assert!(f.exists(), "Cache file exists.");
- assert_eq!(f, t, "Cache path is what we were expecting.");
- let mut header_file = t.clone();
- Cache::append_to_filename(&mut header_file, HEADER_EXT);
- assert!(header_file.exists(), "Cache header file exists.");
- t.pop();
- t.push("anything.header");
- assert_eq!(header_file, t, "Cache header path is what we expected.");
- } else {
- panic!("Cache Status is not Status::Fetched, is: {:?}", r);
- }
- } else {
- panic!("cache.fetch: {:?}", r);
- }
- }
-
- #[test]
- #[cfg(feature = "local-httpbin")]
- fn call_local() {
- let mut dir = testdir!();
- dir.push("cache");
-
- let mut t = dir.clone();
- let cache = Cache::new(dir, None).unwrap();
- let teapot_url = "http://127.0.0.1/status/418";
- let r = cache.fetch(&teapot_url);
- if let Err(e) = r {
- if let Error::HttpErrorStatus(code) = e {
- assert_eq!(code, 418);
- } else {
- panic!("Not an ErrorStatus");
- }
- } else {
- panic!("Unexpected error: {r:?}");
- }
-
- let r = cache.fetch("http://127.0.0.1:1024");
- assert!(r.is_err(), "Confirm connection error");
-
-
- }
-
- #[test]
- #[cfg(feature = "local-httpbin")]
- fn cache_local() {
- let mut dir = testdir!();
- dir.push("cache");
-
- let mut t = dir.clone();
- let cache = Cache::new(dir, None).unwrap();
- let etag_url = "http://127.0.0.1/etag/meow";
- let r = cache.fetch(&etag_url);
- if let Ok(r) = r {
- match r {
- Status::Fetched(_) => {}
- _ => {
- panic!("Expected Status::Fetched on 1st request.");
- }
- }
- } else {
- panic!("Unexpected error: {r:?}");
- }
-
- let r2 = cache.fetch(&etag_url);
- if let Ok(r2) = r2 {
- match r2 {
- Status::Cached(_) => {}
- _ => {
- panic!("Expected Status::Cached on 2nd request.");
- }
- }
- } else {
- panic!("Unexpected error: {r2:?}");
- }
-
- }
- }
|