// use sha256;

use std::fs::{File, create_dir_all, read_dir, remove_file};
use std::io::{BufRead, BufReader, Write};
use std::path::PathBuf;
use std::result::Result;
use std::time::{Duration, SystemTime};
use url::Url;
// Error
use std::error::Error as Errorr;
use std::fmt;

#[deny(missing_docs)]
// #[warn(missing_docs)]

/// Convert relate to absolute
///
/// This can fail if Url is unable to parse, or Url is unable to join.
#[must_use]
pub fn relative_to_absolute(
    base_url: &str,
    relative_href: &str,
) -> Result<String, url::ParseError> {
    let base_url = Url::parse(base_url)?;
    let new_url = base_url.join(relative_href)?;
    Ok(new_url.to_string())
}

/// Save reqwest::header::HeaderMap to file.
///
/// This also stores the url in the file, so I know what URL was called for
/// this reqwest.
///
/// It has each item on a single line:
/// header: value
/// The first line will be url: (Which is not part of original header.)
pub fn save_headermap(
    filename: &str,
    url: &str,
    header: &reqwest::header::HeaderMap,
) -> Result<(), std::io::Error> {
    let mut fp = File::create(filename)?;

    fp.write_all(format!("url: {}\n", url).as_bytes())?;
    for (key, value) in header.iter() {
        if let Ok(value) = value.to_str() {
            fp.write_all(format!("{}: {}\n", key, value).as_bytes())?;
        }
    }
    Ok(())
}

/// Load reqwest::header::HeaderMap from file.
///
/// This will have the url of the original call in the "url" section.
pub fn load_headermap(filename: &str) -> Result<reqwest::header::HeaderMap, std::io::Error> {
    let fp = File::open(filename)?;
    let mut buffer = BufReader::new(fp);
    let mut line = String::new();
    let mut header = reqwest::header::HeaderMap::new();

    loop {
        if buffer.read_line(&mut line).unwrap() == 0 {
            break;
        };
        let temp = line.trim_end();

        if let Some(parts) = temp.split_once(": ") {
            let head = reqwest::header::HeaderName::from_bytes(parts.0.as_bytes()).unwrap();
            if let Ok(value) = reqwest::header::HeaderValue::from_str(&parts.1) {
                header.insert(head, value);
            }
        }

        line.clear();
    }

    Ok(header)
}

/// Caching web calls
///
/// Set the directory, and we're ready to make cached web calls.
/// Since we're not storing the file in memory now, max_size isn't
/// the concern it once was.
pub struct Cache {
    /// Directory where cache is stored
    pub directory: PathBuf,
    // *This is where we would select async or blocking.*
    /// Reqwest Client
    pub client: reqwest::blocking::Client,
    /// Vector of content-types to accept (empty=all)
    pub accept: Vec<String>,
    /// Max size of content to download (default unlimited)
    pub max_size: Option<u64>,
}

// Should I also have std::io::Errors in here as well?
// I can have File IO errors.

/// Status of fetch
#[allow(dead_code)]
#[derive(Debug)]
#[repr(u8)]
pub enum Status {
    /// File was downloaded.
    Fetched(PathBuf),
    /// File was retrieved from cache.
    Cached(PathBuf),
}

impl Status {
    /// Return pathbuf, always
    pub fn download_path(&self) -> &PathBuf {
        match self {
            Status::Fetched(path) | Status::Cached(path) => {
                return path;
            }
        }
    }
}

#[derive(Debug)]
pub enum Error {
    /// Reqwest error (unable to connect), or IO Error  std::io::Error
    ReqwestError(reqwest::Error),
    IOError(std::io::Error),
    /// Content-Type wasn't allowed, see Cache.accept.
    Unacceptable(String), // Content-Type
    /// Content was too big, see Cache.max_size.
    TooBig(u64),
    /// HTTP Error/status code.
    HttpErrorStatus(u16),
}

// This allows ? to return cache::Error from std::io::Error (see expire)

impl From<std::io::Error> for Error {
    fn from(e: std::io::Error) -> Self {
        Self::IOError(e)
    }
}

impl From<reqwest::Error> for Error {
    fn from(e: reqwest::Error) -> Self {
        Self::ReqwestError(e)
    }
}

impl fmt::Display for Error {
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        match self {
            Error::ReqwestError(e) => write!(f, "ReqwestError: {:?}", e),
            Error::IOError(e) => write!(f, "IOError: {:?}", e),
            Error::Unacceptable(ct) => write!(f, "Content-Type {} not allowed", ct),
            Error::TooBig(size) => write!(f, "Content-Size {} too big", size),
            Error::HttpErrorStatus(status) => write!(f, "Status Code: {}", status),
        }
    }
}

// This made anyhow happy with my cache::Error.
impl Errorr for Error {}

/*
Some possible content-type values:  We're only interested in a few of these...

text/css
text/javascript
text/plain
image/jpeg
image/png
iamge/gif
application/xml
application/javascript
 */

// If nothing is given for useragent, we default to the application name and version.
static APP_USER_AGENT: &str = concat!(env!("CARGO_PKG_NAME"), "/", env!("CARGO_PKG_VERSION"),);
static HEADER_EXT: &str = ".header";

impl Cache {
    /// Construct Cache using given directory for caching, and useragent.
    pub fn new(dir: PathBuf, useragent: Option<&str>) -> Result<Self, Error> {
        // Verify the directory exists

        let path = dir.as_path();
        if path.exists() {
            if !path.is_dir() {
                // It exists, but it isn't a directory!  What?!
                return Err(Error::IOError(std::io::Error::new(
                    std::io::ErrorKind::Other,
                    format!(
                        "Can't create Cache dir {}, it already exists.",
                        dir.display()
                    ),
                )));
            }
        } else {
            match create_dir_all(path) {
                Err(e) => {
                    return Err(Error::IOError(e));
                }
                Ok(_) => {}
            }
        }

        let user_agent = if let Some(ua) = useragent {
            ua
        } else {
            APP_USER_AGENT
        };

        // This is where we select async or blocking.
        match reqwest::blocking::Client::builder()
            .user_agent(user_agent)
            .build()
        {
            Ok(client) => {
                Ok(Self {
                    directory: dir,
                    client: client,
                    accept: vec![], // Accept all content-type.
                    max_size: None, // Some(256 * 1024 * 1024), // 256 MB
                })
            }
            Err(e) => {
                // Client::builder error
                return Err(Error::ReqwestError(e));
            }
        }
    }

    #[allow(dead_code)]
    pub fn add_content_type(&mut self, content_type: String) {
        self.accept.push(content_type);
    }

    #[allow(dead_code)]
    pub fn clear_content_type(&mut self) {
        self.accept.clear();
    }

    #[allow(dead_code)]
    pub fn set_max_size(&mut self, size: u64) {
        self.max_size = Some(size);
    }

    #[allow(dead_code)]
    pub fn clear_max_size(&mut self) {
        self.max_size = None;
    }

    /// Create safe filename from url for header/content files.
    pub fn url_to_basename(url: &str) -> String {
        let filename = if url.ends_with("/") {
            ""
        } else {
            if let Some(has_file) = url.rsplit_once("/") {
                has_file.1
            } else {
                ""
            }
        };

        if filename.is_empty() {
            // Getting the filename part failed.
            // Like in cases where the url is https://go.dev/dl/
            // Which becomes go.dev-dl
            let mut path = url.to_string();
            path = path.replace("https://", "");
            path = path.replace("http://", "");
            path = path.replace("/", "-");
            path = path.replace(".", "-");
            if path.ends_with("-") {
                path.pop();
            }
            return path;
        }
        filename.to_string()
    }

    /// Expire files in the cache older then given age
    ///
    /// Use DirEntry.modified, since it updates when a file is freshened/downloaded.
    /// DirEntry.created isn't updated when file is rewritten.
    #[allow(dead_code)]
    pub fn expire(&self, age: Duration) -> Result<bool, Error> {
        let now = SystemTime::now();
        let mut result: bool = false;

        for file in read_dir(self.directory.as_path())? {
            let file = file?;
            if let Ok(d) = file.metadata() {
                if d.is_file() {
                    // Created isn't updated if the file is fetched.  Use modified, that updates on fetch.

                    let filename = String::from(file.file_name().to_str().unwrap());

                    if filename.ends_with(HEADER_EXT) {
                        // This is a header cache file...
                        if let Ok(modify) = d.modified() {
                            if let Ok(delta) = now.duration_since(modify) {
                                // println!("expire {} = modified {}", filename, delta.as_secs());
                                if delta > age {
                                    // println!("Would delete: {} (and .content)", filename);
                                    let mut filepath = self.directory.join(filename);
                                    let r = remove_file(&filepath);
                                    if let Err(e) = r {
                                        println!("RemoveFile {:?}: {}", filepath, e);
                                    }
                                    // Also delete .content !
                                    // Which is trickier to find now...
                                    Self::remove_from_filename(&mut filepath);
                                    // filepath.set_extension("content");
                                    let r = remove_file(&filepath);
                                    if let Err(e) = r {
                                        println!("RemoveFile {:?}: {}", filepath, e);
                                    }

                                    result = true;
                                }
                            }
                        }
                    }

                    /*
                    if let Ok(access) = d.accessed() {
                        if let Ok(delta) = now.duration_since(access) {
                            println!("accessed {:?} = accessed {}", file.file_name(), delta.as_secs());
                            if delta > age {
                                println!("Expire: {:?}", file.file_name());
                            }
                        }
                    }

                    if let Ok(created) = d.created() {
                        if let Ok(delta) = now.duration_since(created) {
                            println!("expire {:?} = created {}", file.file_name(), delta.as_secs());
                            if delta > age {
                                println!("Would delete: {:?}", file.file_name());
                                result = true;
                            }
                        }
                    }
                    */
                }
            }
        }
        Ok(result)
    }

    /// Given a url, return the filename
    ///
    /// The filename might not exist.  It is only the filename
    /// that would be used for the given url.
    pub fn filename_for_url(&self, url: &str) -> PathBuf {
        self.directory.as_path().join(Self::url_to_basename(url))
    }

    /// Given a url, return an open file
    ///
    /// Reading from cache.
    #[allow(dead_code)]
    pub fn file(&self, url: &str) -> Option<File> {
        let base = self.filename_for_url(url);
        /*
        let base = self
            .directory
            .as_path()
            .join(Self::url_to_basename(url).unwrap());
        */
        if base.exists() {
            return Some(File::open(base).unwrap());
        }
        None
    }

    /// Return filename from pathbuf as String
    #[allow(dead_code)]
    #[must_use]
    fn pathbuf_filename(path: &PathBuf) -> String {
        path.file_name().unwrap().to_string_lossy().to_string()
    }

    /// Add to the PathBuf filename
    ///
    /// This is different then PathBuf::set_extension
    /// which replaces everything.
    fn append_to_filename(path: &mut PathBuf, append: &str) {
        // Append to the filename.
        let filename = path.file_name().unwrap().to_string_lossy().to_string() + append;
        path.set_file_name(filename);
    }

    /// Remove an extension from the filename.
    ///
    /// Given something.tar.gz.header return something.tar.gz
    fn remove_from_filename(path: &mut PathBuf) {
        let filename = Self::pathbuf_filename(path);
        if let Some(parts) = filename.rsplit_once(".") {
            path.set_file_name(parts.0);
        } else {
            panic!(
                "Unable to locate the trailing extension . from: {}",
                path.display()
            );
        }
    }

    /// Fetch, without using the cache.
    ///
    /// This deletes the .header cache file, which forces a fetch.
    #[allow(dead_code)]
    pub fn fetch_nocache(&self, url: &str) -> Result<Status, Error> {
        let mut base = self.filename_for_url(url);
        Self::append_to_filename(&mut base, HEADER_EXT);
        if base.exists() {
            match remove_file(&base) {
                Err(e) => {
                    // unlink failed
                    return Err(Error::IOError(e));
                }
                Ok(_) => {}
            }
        }
        return self.fetch(url);
    }

    // I'm not sure about using Result<Status> here...
    // It would allow for ? usage.

    /// Fetch the URL from the web
    ///
    /// This returns Status, which could be Fetched or Cached copy (among other things).
    #[must_use]
    pub fn fetch(&self, url: &str) -> Result<Status, Error> {
        let base = self.filename_for_url(url);
        /*
        let base = self
            .directory
            .as_path()
            .join(Self::url_to_basename(url).unwrap());
        */
        let mut builder = self.client.get(url);
        // Don't send just yet!
        // Set some headers to see if page content has changed.

        let mut header_file = base.clone();
        Self::append_to_filename(&mut header_file, HEADER_EXT);

        if header_file.exists() {
            // Ok! We have existing information.  Retrieve it.
            match load_headermap(header_file.to_str().unwrap()) {
                Ok(old_header) => {
                    // Look for: ETag, Last-Modified
                    if let Some(lastmod) = old_header.get("Last-Modified") {
                        builder = builder.header("If-Modified-Since", lastmod);
                    } else if let Some(date) = old_header.get("Date") {
                        // Keep trying...
                        builder = builder.header("If-Modified-Since", date);
                    }

                    if let Some(etag) = old_header.get("etag") {
                        builder = builder.header("If-None-Match", etag);
                    }
                }
                Err(e) => {
                    return Err(Error::IOError(e));
                }
            }
        };

        match builder.send() {
            Ok(mut result) => {
                if result.status() == 304 {
                    // Cache hit!
                    return Ok(Status::Cached(base));
                }

                // Ok!  Success!
                if result.status() == 200 {
                    // Success!

                    // When caching fails ―
                    //
                    // If content_length (from previous fetch) matches current?
                    // Could we assume it hasn't changed, and just use cache?
                    // Or would that be a big assumption?

                    // Only check content_length size, if we have been
                    // given a max_size.
                    if let Some(max_size) = self.max_size {
                        if let Some(len) = result.content_length() {
                            if len > max_size {
                                // Is there a way to abort this safely?  Apparently yes! :D

                                // let byte = Byte::from_u64(len);
                                // let adjusted_byte = byte.get_appropriate_unit(UnitType::Binary);
                                // println!("Too Big! {adjusted_byte:.2} {}", url);
                                return Err(Error::TooBig(len));
                            }
                        }
                    }

                    // Only check acceptable content_types if given.
                    if !self.accept.is_empty() {
                        if let Some(content_type) = result.headers().get("content-type") {
                            // Check to see if accepted content.
                            let mut ct = content_type.to_str().unwrap();
                            let possible = content_type.to_str().unwrap().split_once(';');
                            if let Some((ct_part, _)) = possible {
                                ct = ct_part;
                            }
                            if !self.accept.contains(&ct.to_string()) {
                                // println!("Unacceptable content-type {} {}", ct, url);
                                return Err(Error::Unacceptable(ct.to_string()));
                            }
                        }
                    }

                    match save_headermap(header_file.to_str().unwrap(), url, result.headers()) {
                        Err(e) => {
                            return Err(Error::IOError(e));
                        }
                        Ok(()) => {}
                    }

                    match File::create(base.to_str().unwrap()) {
                        Ok(mut fp) => match result.copy_to(&mut fp) {
                            Ok(_) => {}
                            Err(e) => {
                                return Err(Error::ReqwestError(e));
                            }
                        },
                        Err(e) => {
                            return Err(Error::IOError(e));
                        }
                    }

                    // result.copy_to(&mut fp)?;

                    /*  // async
                    while let Ok(Some(chunk)) = result.chunk().await {
                        let _ = fp.write(&chunk);
                    }
                    */
                    return Ok(Status::Fetched(base));
                } else {
                    // Status error
                    // println!("Error {} {}", result.status(), url);
                    return Err(Error::HttpErrorStatus(u16::from(result.status())));
                }
            }
            Err(e) => {
                return Err(Error::ReqwestError(e));
            }
        }
    }
}

/*
https://httpbin.org/anything
/headers
/ip
/user-agent
/status/404
/status/200
/cache/value for cache-control
/cache (if-modified-since or if-none-match are present, returns 304)
/etag/value for etag (if-none-match or if-match)
/uuid

/brotli
/deflate
/gzip

^ I wonder what happens if I request one that isn't enabled in reqwest?

 */

#[cfg(test)]
mod tests {
    use super::*;
    use std::collections::HashMap;
    use testdir::testdir;

    #[test]
    fn relative_test() {
        let rel_abs: HashMap<(&str, &str), &str> = HashMap::from([
            (
                ("http://meow.org/rabbit", "/llama/index.html"),
                "http://meow.org/llama/index.html",
            ),
            (
                ("https://example.com/dir/index.html", "about.html"),
                "https://example.com/dir/about.html",
            ),
            (
                ("https://example.com/dir/index.html", "../and/about.html"),
                "https://example.com/and/about.html",
            ),
            (
                (
                    "https://here.com/dir/index.html",
                    "http://there.com/about.html",
                ),
                "http://there.com/about.html",
            ),
        ]);
        for (base, url) in rel_abs {
            if let Ok(abs) = relative_to_absolute(base.0, base.1) {
                assert_eq!(abs, url, "Base {}, Rel {} => {}", base.0, base.1, url);
            } else {
                panic!("Failed {} + {} => {}", base.0, base.1, url);
            }
        }
    }

    #[test]
    fn url_to_filename_test() {
        let mut dir = testdir!();
        dir.push("cache");
        let temp = dir.clone();

        let cache = Cache::new(dir, None).unwrap();

        // url_to_basename
        let url_base: HashMap<&str, &str> = HashMap::from([
            ("https://go.dev/dl/go1.23.45.tar.gz", "go1.23.45.tar.gz"),
            ("https://go.dev/dl", "dl"),
            ("https://go.dev/dl/", "go-dev-dl"),
        ]);

        for (url, base) in url_base {
            // Verify url_to_basename.
            let basename = Cache::url_to_basename(url);
            assert_eq!(base, basename, "{} -> {}", url, base);
            // Verify filename_for_url.
            let path = cache.filename_for_url(url);
            let mut newpath = temp.clone();
            newpath.push(base);
            assert_eq!(path.as_os_str(), newpath.as_os_str(), "{} -> {}", url, base);
        }

        for filename in vec!["go1.23.45.tar.gz", "test.html"] {
            let newname = String::from(filename) + HEADER_EXT;
            let mut newpath = temp.clone();
            newpath.set_file_name(filename);
            Cache::append_to_filename(&mut newpath, HEADER_EXT);
            assert_eq!(
                &newpath.file_name().unwrap().to_string_lossy().to_string(),
                &newname,
                "{} {}",
                filename,
                HEADER_EXT
            );
            // Test to make sure this removes HEADER_EXT from the filename.
            Cache::remove_from_filename(&mut newpath);
            assert_eq!(
                &newpath.file_name().unwrap().to_string_lossy().to_string(),
                filename,
                "{}",
                filename
            )
        }
    }

    #[test]
    #[cfg(not(feature = "local-httpbin"))]
    fn cache_fetch() {
        let mut dir = testdir!();
        dir.push("cache");

        // Make a copy of the cache directory PathBuf for verifying paths.
        let mut t = dir.clone();

        let cache = Cache::new(dir, None).unwrap();
        let r = cache.fetch("https://httpbin.org/anything");

        t.push("anything");

        if let Ok(r) = r {
            if let Status::Fetched(f) = r {
                assert!(f.exists(), "Cache file exists.");
                assert_eq!(f, t, "Cache path is what we were expecting.");
                let mut header_file = t.clone();
                Cache::append_to_filename(&mut header_file, HEADER_EXT);

                assert!(header_file.exists(), "Cache header file exists.");
                t.pop();
                t.push("anything.header");
                assert_eq!(header_file, t, "Cache header path is what we expected.");
            } else {
                panic!("Cache Status is not Status::Fetched, is: {:?}", r);
            }
        } else {
            panic!("cache.fetch: {:?}", r);
        }
    }

    /*
    Add to Config.toml:
    [features]
    local-httpbin = []

    Use:
    #[test]
    #[cfg(feature = "local-httpbin")]

    And then:
    cargo test -F local-httpbin -- --show-output

    This runs the local httpbin tests.
     */

    #[test]
    #[cfg(feature = "local-httpbin")]
    fn call_local() {
        let mut dir = testdir!();
        dir.push("cache");

        // Make a copy of the cache directory PathBuf for verifying paths.
        let mut t = dir.clone();

        let cache = Cache::new(dir, None).unwrap();
        let teapot_url = "http://127.0.0.1/status/418";

        let r = cache.fetch(&teapot_url);
        if let Err(e) = r {
            if let Error::HttpErrorStatus(code) = e {
                assert_eq!(code, 418);
            } else {
                panic!("Not an ErrorStatus");
            }
        } else {
            panic!("Unexpected error: {r:?}");
        }
        // println!("{:?}", r);

        let r = cache.fetch("http://127.0.0.1:1024");
        assert!(r.is_err(), "Confirm connection error");

        /*
        I disabled brotli in the Client builder.
        I get an error below about invalid UTF-8.  The httpbin server isn't smart
        enough to see I don't support it, and sends it anyway.  :(
        */

        /*
        let brot_url = "http://127.0.0.1/brotli";
        let r = cache.fetch(brot_url);
        println!("Brotli: {:?}", r);

        if let Status::Fetched(path) = r {
                let data = std::fs::read_to_string(path).unwrap();
                println!("DATA:\n{}", data);
        }
        */
    }

    /*
    These tests require a running local httpbin image.
    ```
    services:
      httpbin:
        image: kennethreitz/httpbin
        ports:
          - "80:80"
    ```

     */

    #[test]
    #[cfg(feature = "local-httpbin")]
    fn cache_local() {
        let mut dir = testdir!();
        dir.push("cache");

        // Make a copy of the cache directory PathBuf for verifying paths.
        let mut t = dir.clone();

        let cache = Cache::new(dir, None).unwrap();
        let etag_url = "http://127.0.0.1/etag/meow";

        let r = cache.fetch(&etag_url);
        if let Ok(r) = r {
            match r {
                Status::Fetched(_) => {}
                _ => {
                    panic!("Expected Status::Fetched on 1st request.");
                }
            }
        } else {
            panic!("Unexpected error: {r:?}");
        }
        // 2nd call, the etag header is set.
        let r2 = cache.fetch(&etag_url);
        if let Ok(r2) = r2 {
            match r2 {
                Status::Cached(_) => {}
                _ => {
                    panic!("Expected Status::Cached on 2nd request.");
                }
            }
        } else {
            panic!("Unexpected error: {r2:?}");
        }
        // println!("{:?}\n{:?}", r, r2);
    }
}