From 428bed8a4ade67dd309b9d2839f5a29b56bb314c Mon Sep 17 00:00:00 2001 From: Stefano Pigozzi Date: Thu, 14 Nov 2024 01:39:33 +0100 Subject: [PATCH] `hostmeta`: Move things to different submodules --- acrate-hostmeta/src/descriptor.rs | 475 ++++++++++++++++++++++++++++++ acrate-hostmeta/src/link.rs | 64 ++++ acrate-hostmeta/src/property.rs | 17 ++ acrate-hostmeta/src/utils.rs | 8 + 4 files changed, 564 insertions(+) create mode 100644 acrate-hostmeta/src/descriptor.rs create mode 100644 acrate-hostmeta/src/link.rs create mode 100644 acrate-hostmeta/src/property.rs create mode 100644 acrate-hostmeta/src/utils.rs diff --git a/acrate-hostmeta/src/descriptor.rs b/acrate-hostmeta/src/descriptor.rs new file mode 100644 index 0000000..9dd6157 --- /dev/null +++ b/acrate-hostmeta/src/descriptor.rs @@ -0,0 +1,475 @@ +use serde::Deserialize; +use thiserror::Error; +use crate::link::ResourceDescriptorLink; +use crate::property::ResourceDescriptorProperty; + +/// A resource descriptor object. +/// +/// # Specification +/// +/// - +/// - +/// +#[derive(Debug, Clone, Deserialize)] +pub struct ResourceDescriptor { + /// The resource this document refers to. + /// + /// # Specification + /// + /// - + /// + #[serde(alias = "Subject")] + pub subject: Option, + + /// Other names the resource described by this document can be referred to. + /// + /// # Specification + /// + /// - + /// + #[serde(alias = "Alias")] + pub aliases: Option>, + + /// Additional information about the resource described by this document. + /// + /// # Specification + /// + /// - + /// + #[serde(alias = "Property")] + pub properties: Option>, + + /// Links established between the [`Self::subject`] and other resources. + /// + /// # Specification + /// + /// - + /// - + /// + #[serde(alias = "Link")] + pub links: Option>, +} + +impl ResourceDescriptor { + /// Get a JRD (JSON [`ResourceDescriptor`]). + /// + /// # Notes + /// + /// This follows redirects until the redirect chain is 10 hops; see [`reqwest::redirect`] for more info. + /// + /// # Examples + /// + /// ``` + /// # tokio_test::block_on(async { + /// use acrate_hostmeta::ResourceDescriptor; + /// + /// let client = reqwest::Client::new(); + /// let url: reqwest::Url = "https://junimo.party/.well-known/nodeinfo".parse() + /// .expect("URL to be valid"); + /// + /// let rd = ResourceDescriptor::get_jrd(&client, url) + /// .await + /// .expect("JRD to be processed correctly"); + /// # }) + /// ``` + /// + pub async fn get_jrd(client: &reqwest::Client, url: reqwest::Url) -> Result { + use GetJRDError::*; + + log::debug!("Getting JRD document at: {url}"); + + log::trace!("Building request..."); + let request = { + log::trace!("Creating new request..."); + let mut request = reqwest::Request::new(reqwest::Method::GET, url); + + log::trace!("Setting request headers..."); + let headers = request.headers_mut(); + + log::trace!("Setting `Accept: application/json`..."); + let _ = headers.insert(reqwest::header::ACCEPT, "application/json".parse().unwrap()); + + request + }; + + log::trace!("Sending request..."); + let response = client.execute(request) + .await + .map_err(Request)?; + + log::trace!("Checking `Content-Type` of the response..."); + let content_type = response + .headers() + .get(reqwest::header::CONTENT_TYPE) + .ok_or(ContentTypeMissing)?; + + log::trace!("Extracting MIME type from the `Content-Type` header..."); + let mime_type = crate::utils::extract_mime_from_content_type(content_type) + .ok_or(ContentTypeInvalid)?; + + log::trace!("Ensuring MIME type is acceptable for JRD parsing..."); + if !(mime_type == "application/json" || mime_type == "application/jrd+json") { + log::error!("MIME type `{mime_type}` is not acceptable for JRD parsing."); + return Err(ContentTypeInvalid) + } + + log::trace!("Attempting to parse response as JSON..."); + let data = response.json::() + .await + .map_err(Parse)?; + + Ok(data) + } + + /// Get a XRD (Extensible [`ResourceDescriptor`]). + /// + /// # Notes + /// + /// This follows redirects until the redirect chain is 10 hops; see [`reqwest::redirect`] for more info. + /// + /// # Examples + /// + /// ``` + /// # tokio_test::block_on(async { + /// use acrate_hostmeta::ResourceDescriptor; + /// + /// let client = reqwest::Client::new(); + /// let url: reqwest::Url = "https://junimo.party/.well-known/host-meta".parse() + /// .expect("URL to be valid"); + /// + /// let rd = ResourceDescriptor::get_xrd(&client, url) + /// .await + /// .expect("XRD to be processed correctly"); + /// # }) + /// ``` + /// + pub async fn get_xrd(client: &reqwest::Client, url: reqwest::Url) -> Result { + use GetXRDError::*; + + log::debug!("Getting host-meta XRD document at: {url}"); + + log::trace!("Building request..."); + let request = { + log::trace!("Creating new request..."); + let mut request = reqwest::Request::new(reqwest::Method::GET, url); + + log::trace!("Setting request headers..."); + let headers = request.headers_mut(); + + log::trace!("Setting `Accept: application/xrd+xml`..."); + let _ = headers.insert(reqwest::header::ACCEPT, "application/xrd+xml".parse().unwrap()); + + request + }; + + log::trace!("Sending request..."); + let response = client.execute(request) + .await + .map_err(Request)?; + + log::trace!("Checking `Content-Type` of the response..."); + let content_type = response + .headers() + .get(reqwest::header::CONTENT_TYPE) + .ok_or(ContentTypeMissing)?; + + log::trace!("Extracting MIME type from the `Content-Type` header..."); + let mime_type = crate::utils::extract_mime_from_content_type(content_type) + .ok_or(ContentTypeInvalid)?; + + log::trace!("Ensuring MIME type is acceptable for XRD parsing..."); + if mime_type != "application/xrd+xml" { + log::error!("MIME type `{mime_type}` is not acceptable for XRD parsing."); + return Err(ContentTypeInvalid) + } + + log::trace!("Attempting to parse response as text..."); + let data = response.text() + .await + .map_err(Decode)?; + + log::trace!("Parsing response as XML..."); + let data = quick_xml::de::from_str::(&data) + .map_err(Parse)?; + + Ok(data) + } + + /// Attempt to discover a [`ResourceDescriptor`] at the given URL in various ways. + /// + /// In order, this method attempts: + /// + /// 1. HTTPS [XRD](Self::get_xrd) + /// 2. HTTPS [JRD](Self::get_jrd) + /// 3. HTTPS [JRD](Self::get_jrd) with .json path extension + /// 4. HTTP [XRD](Self::get_xrd) + /// 5. HTTP [JRD](Self::get_jrd) + /// 6. HTTP [JRD](Self::get_jrd) with .json path extension + /// + /// # Notes + /// + /// This follows redirects until the redirect chain is 10 hops; see [`reqwest::redirect`] for more info. + /// + /// # Examples + /// + /// ``` + /// # tokio_test::block_on(async { + /// use acrate_hostmeta::ResourceDescriptor; + /// + /// let client = reqwest::Client::new(); + /// let url: reqwest::Url = "https://junimo.party/.well-known/host-meta".parse() + /// .expect("URL to be valid"); + /// + /// let rd = ResourceDescriptor::discover(&client, url) + /// .await + /// .expect("resource descriptor to be discovered correctly"); + /// # }) + /// ``` + /// + pub async fn discover(client: &reqwest::Client, mut url: reqwest::Url) -> Result { + use ResourceDescriptorDiscoveryError::*; + + log::debug!("Discovering resource descriptor document at: {url}"); + + log::trace!("Unsetting URL query..."); + url.set_query(None); + + log::trace!("Unsetting URL fragment..."); + url.set_fragment(None); + + log::trace!("Setting URL scheme to HTTPS..."); + url.set_scheme("https") + .map_err(UrlManipulation)?; + + log::trace!("Cloning URL for HTTPS XRD retrieval..."); + let https_xrd_url = url.clone(); + + log::trace!("Attempting HTTPS XRD retrieval..."); + let https_xrd = match Self::get_xrd(client, https_xrd_url).await { + Ok(data) => { + log::trace!("HTTPS XRD retrieval was successful, returning..."); + return Ok(data) + } + Err(err) => { + log::warn!("HTTPS XRD retrieval failed."); + err + } + }; + + log::trace!("Cloning URL for HTTPS JRD retrieval..."); + let https_jrd_url = url.clone(); + + log::trace!("Attempting HTTPS JRD retrieval..."); + let https_jrd = match Self::get_jrd(client, https_jrd_url).await { + Ok(data) => { + log::trace!("HTTPS JRD retrieval was successful, returning..."); + return Ok(data) + } + Err(err) => { + log::warn!("HTTPS JRD retrieval failed."); + err + } + }; + + log::trace!("Cloning URL for HTTPS JRD .json retrieval..."); + let mut https_jrdj_url = url.clone(); + + log::trace!("Altering URL path for HTTPS JRD .json retrieval..."); + https_jrdj_url.set_path( + &format!("{}.json", https_jrdj_url.path()) + ); + + log::trace!("Attempting HTTPS JRD .json retrieval..."); + let https_jrdj = match Self::get_jrd(client, https_jrdj_url).await { + Ok(data) => { + log::trace!("HTTPS JRD .json retrieval was successful, returning..."); + return Ok(data) + } + Err(err) => { + log::warn!("HTTPS JRD .json retrieval failed."); + err + } + }; + + log::trace!("Setting URL scheme to HTTP..."); + url.set_scheme("http") + .map_err(UrlManipulation)?; + + log::trace!("Cloning URL for HTTP XRD retrieval..."); + let http_xrd_url = url.clone(); + + log::trace!("Attempting HTTP XRD retrieval..."); + let http_xrd = match Self::get_xrd(client, http_xrd_url).await { + Ok(data) => { + log::trace!("HTTP XRD retrieval was successful, returning..."); + return Ok(data) + } + Err(err) => { + log::warn!("HTTP XRD retrieval failed."); + err + } + }; + + log::trace!("Cloning URL for HTTP JRD retrieval..."); + let http_jrd_url = url.clone(); + + log::trace!("Attempting HTTP JRD retrieval..."); + let http_jrd = match Self::get_jrd(client, http_jrd_url).await { + Ok(data) => { + log::trace!("HTTP JRD retrieval was successful, returning..."); + return Ok(data) + } + Err(err) => { + log::warn!("HTTP JRD retrieval failed."); + err + } + }; + + log::trace!("Cloning URL for HTTP JRD .json retrieval..."); + let mut http_jrdj_url = url.clone(); + + log::trace!("Altering URL path for HTTPS JRD .json retrieval..."); + http_jrdj_url.set_path( + &format!("{}.json", http_jrdj_url.path()) + ); + + log::trace!("Attempting HTTP JRD .json retrieval..."); + let http_jrdj = match Self::get_jrd(client, http_jrdj_url).await { + Ok(data) => { + log::trace!("HTTP JRD .json retrieval was successful, returning..."); + return Ok(data) + } + Err(err) => { + log::warn!("HTTP JRD .json retrieval failed."); + err + } + }; + + Err( + ResourceDescriptorDiscoveryError::Fetch( + ResourceDescriptorDiscoveryFailures { + https_xrd, + https_jrd, + https_jrdj, + http_xrd, + http_jrd, + http_jrdj, + } + ) + ) + } + + /// Well-known path for host-meta documents. + /// + /// # Specification + /// + /// - + /// + pub const WELLKNOWN_HOSTMETA_PATH: &str = "/.well-known/host-meta"; + + /// Attempt to discover a host-meta document at the given base URL. + /// + /// # Examples + /// + /// ``` + /// # tokio_test::block_on(async { + /// use acrate_hostmeta::ResourceDescriptor; + /// + /// let client = reqwest::Client::new(); + /// let base: reqwest::Url = "https://junimo.party".parse() + /// .expect("URL to be valid"); + /// + /// let rd = ResourceDescriptor::discover_hostmeta(&client, base) + /// .await + /// .expect("host-meta to be discovered correctly"); + /// }) + /// ``` + /// + pub async fn discover_hostmeta(client: &reqwest::Client, mut base: reqwest::Url) -> Result { + base.set_path(Self::WELLKNOWN_HOSTMETA_PATH); + + Self::discover(client, base) + .await + } +} + +/// Error occurred during [`ResourceDescriptor::discover`]. +#[derive(Debug, Error)] +pub enum ResourceDescriptorDiscoveryError { + /// Manipulation of the provided base [`reqwest::Url`] failed. + /// + /// See [reqwest::Url::set_scheme] for possible causes. + #[error("manipulation of the provided URL failed")] + UrlManipulation(()), + + /// All attempts of fetching a resource descriptor document failed. + #[error("fetchign the resource descriptor document failed")] + Fetch(ResourceDescriptorDiscoveryFailures), +} + +/// Request errors occurred during [`ResourceDescriptor::discover`]. +#[derive(Debug, Error)] +#[error("all attempts of fetching the resource descriptor document failed")] +pub struct ResourceDescriptorDiscoveryFailures { + /// HTTPS XRD retrieval. + pub https_xrd: GetXRDError, + + /// HTTPS JRD retrieval. + pub https_jrd: GetJRDError, + + /// HTTPS JRD with .json extension retrieval. + pub https_jrdj: GetJRDError, + + /// HTTPS XRD retrieval. + pub http_xrd: GetXRDError, + + /// HTTP JRD retrieval. + pub http_jrd: GetJRDError, + + /// HTTP JRD with .json extension retrieval. + pub http_jrdj: GetJRDError, +} + +/// Error occurred during [`ResourceDescriptor::get_xrd`]. +#[derive(Debug, Error)] +pub enum GetXRDError { + /// The HTTP request failed. + #[error("the HTTP request failed")] + Request(reqwest::Error), + + /// The `Content-Type` header of the response is missing. + #[error("the Content-Type header of the response is missing")] + ContentTypeMissing, + + /// The `Content-Type` header of the response is invalid. + #[error("the Content-Type header of the response is invalid")] + ContentTypeInvalid, + + /// The document failed to be decoded as text. + #[error("the document failed to be decoded as text")] + Decode(reqwest::Error), + + /// The document failed to be parsed as XML by [`quick_xml`]. + #[error("the document failed to be parsed as XML")] + Parse(quick_xml::DeError), +} + +/// Error occurred during [`ResourceDescriptor::get_jrd`]. +#[derive(Debug, Error)] +pub enum GetJRDError { + /// The HTTP request failed. + #[error("the HTTP request failed")] + Request(reqwest::Error), + + /// The `Content-Type` header of the response is missing. + #[error("the Content-Type header of the response is missing")] + ContentTypeMissing, + + /// The `Content-Type` header of the response is invalid. + #[error("the Content-Type header of the response is invalid")] + ContentTypeInvalid, + + /// The document failed to be parsed as JSON by [`reqwest`]. + #[error("the document failed to be parsed as JSON")] + Parse(reqwest::Error), +} diff --git a/acrate-hostmeta/src/link.rs b/acrate-hostmeta/src/link.rs new file mode 100644 index 0000000..2a4864d --- /dev/null +++ b/acrate-hostmeta/src/link.rs @@ -0,0 +1,64 @@ +use serde::Deserialize; +use std::collections::HashMap; +use crate::property::ResourceDescriptorProperty; + +/// A link element, which puts the subject resource in relation with another. +/// +/// # Specification +/// +/// - +/// +#[derive(Debug, Clone, Deserialize)] +pub struct ResourceDescriptorLink { + /// The kind of relation established by the subject with the attached resource. + /// + /// # Specification + /// + /// - + /// + #[serde(alias = "@rel")] + pub rel: String, + + /// The media type of the resource put in relation. + /// + /// # Specification + /// + /// - + /// + #[serde(alias = "@type")] + pub r#type: Option, + + /// URI to the resource put in relation. + /// + /// # Specification + /// + /// - + /// + #[serde(alias = "@href")] + pub href: Option, + + /// Titles of the resource put in relation in various languages. + /// + /// # Specification + /// + /// - + /// + pub titles: Option>>, + + /// Additional information about the resource put in relation. + /// + /// # Specification + /// + /// - + /// + pub properties: Option>, + + /// Template to fill to get the URL to resource-specific information. + /// + /// # Specification + /// + /// - + /// + #[serde(alias = "@template")] + pub template: Option, +} diff --git a/acrate-hostmeta/src/property.rs b/acrate-hostmeta/src/property.rs new file mode 100644 index 0000000..00e7fe7 --- /dev/null +++ b/acrate-hostmeta/src/property.rs @@ -0,0 +1,17 @@ +use serde::Deserialize; + +/// A property element, which describes a certain aspect of the subject resource. +/// +/// # Specification +/// +/// - +/// +#[derive(Debug, Clone, Deserialize)] +pub struct ResourceDescriptorProperty { + /// The property identifier, or type. + #[serde(alias = "@type")] + pub r#type: String, + + /// The property value. + pub value: Option, +} diff --git a/acrate-hostmeta/src/utils.rs b/acrate-hostmeta/src/utils.rs new file mode 100644 index 0000000..fd3022c --- /dev/null +++ b/acrate-hostmeta/src/utils.rs @@ -0,0 +1,8 @@ +/// Extract the MIME type from the value of the `Content-Type` header. +pub fn extract_mime_from_content_type(value: &reqwest::header::HeaderValue) -> Option { + let value = value.to_str().ok()?; + match value.split_once("; ") { + None => Some(value.to_string()), + Some((mime, _)) => Some(mime.to_string()), + } +}