diff --git a/.editorconfig b/.editorconfig new file mode 100644 index 0000000..d9a86a3 --- /dev/null +++ b/.editorconfig @@ -0,0 +1,21 @@ +root = true + +[*] +charset = utf-8 +end_of_line = lf +indent_size = 4 +indent_style = tab +insert_final_newline = true +tab_width = 4 + +[*.yml] +indent_size = 2 +indent_style = space + +[*.md] +indent_size = 3 +indent_style = space + +[*.rst] +indent_size = 3 +indent_style = space diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 0000000..e69de29 diff --git a/Cargo.toml b/Cargo.toml index defdbd3..86f0e07 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,3 +1,3 @@ [workspace] resolver = "2" -members = ["acrate-core", "acrate-nodeinfo"] +members = ["acrate-core", "acrate-hostmeta", "acrate-nodeinfo"] diff --git a/acrate-core/Cargo.toml b/acrate-core/Cargo.toml index 2688ecd..6823787 100644 --- a/acrate-core/Cargo.toml +++ b/acrate-core/Cargo.toml @@ -6,3 +6,8 @@ edition = "2021" [dependencies] diesel = "2.2.4" diesel_migrations = "2.2.0" +acrate-hostmeta = { path = "../acrate-hostmeta" } +acrate-nodeinfo = { path = "../acrate-nodeinfo" } + +[lints.clippy] +tabs-in-doc-comments = "allow" diff --git a/acrate-core/migrations/2024-11-09-084403_Add nodeinfo table/down.sql b/acrate-core/migrations/2024-11-09-084403_Add nodeinfo table/down.sql new file mode 100644 index 0000000..2111f46 --- /dev/null +++ b/acrate-core/migrations/2024-11-09-084403_Add nodeinfo table/down.sql @@ -0,0 +1 @@ +DROP TABLE nodeinfo; \ No newline at end of file diff --git a/acrate-core/migrations/2024-11-09-084403_Add nodeinfo table/up.sql b/acrate-core/migrations/2024-11-09-084403_Add nodeinfo table/up.sql new file mode 100644 index 0000000..c21c18f --- /dev/null +++ b/acrate-core/migrations/2024-11-09-084403_Add nodeinfo table/up.sql @@ -0,0 +1,9 @@ +CREATE TABLE nodeinfo ( + nodeinfo_schema VARCHAR NOT NULL, + nodeinfo_href VARCHAR NOT NULL, + nodeinfo_data JSON NOT NULL, + + last_updated TIMESTAMP NOT NULL, + + PRIMARY KEY(nodeinfo_href) +); diff --git a/acrate-core/src/lib.rs b/acrate-core/src/lib.rs index b93cf3f..dae5ee7 100644 --- a/acrate-core/src/lib.rs +++ b/acrate-core/src/lib.rs @@ -1,14 +1,4 @@ -pub fn add(left: u64, right: u64) -> u64 { - left + right -} +//! Core crate of the `acrate` project. -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn it_works() { - let result = add(2, 2); - assert_eq!(result, 4); - } -} +pub use acrate_nodeinfo as nodeinfo; +pub use acrate_hostmeta as hostmeta; diff --git a/acrate-hostmeta/Cargo.toml b/acrate-hostmeta/Cargo.toml new file mode 100644 index 0000000..bda02e5 --- /dev/null +++ b/acrate-hostmeta/Cargo.toml @@ -0,0 +1,19 @@ +[package] +name = "acrate-hostmeta" +version = "0.1.0" +edition = "2021" + +[dependencies] +log = "0.4.22" +quick-xml = { version = "0.37.0", features = ["overlapped-lists", "serialize"] } +reqwest = { version = "0.12.9", features = ["json", "stream"] } +serde = { version = "1.0.214", features = ["derive"] } +serde_json = "1.0.132" + +[dev-dependencies] +pretty_env_logger = "0.5.0" +tokio = { version = "1.41.1", features = ["macros", "rt-multi-thread"] } +tokio-test = "0.4.4" + +[lints.clippy] +tabs-in-doc-comments = "allow" diff --git a/acrate-hostmeta/src/lib.rs b/acrate-hostmeta/src/lib.rs new file mode 100644 index 0000000..57ecd2b --- /dev/null +++ b/acrate-hostmeta/src/lib.rs @@ -0,0 +1,548 @@ +//! Resource descriptior deserializer. +//! +//! # Specification +//! +//! - +//! - + +use std::collections::HashMap; + +use serde::Deserialize; + +/// A resource descriptor object. +/// +/// # Specification +/// +/// - +/// - +/// +#[derive(Debug, Clone, Deserialize)] +pub struct ResourceDescriptor { + /// The resource this document refers to. + /// + /// # Specification + /// + /// - + /// + #[serde(alias = "Subject")] + pub subject: Option, + + /// Other names the resource described by this document can be referred to. + /// + /// # Specification + /// + /// - + /// + #[serde(alias = "Alias")] + pub aliases: Option>, + + /// Additional information about the resource described by this document. + /// + /// # Specification + /// + /// - + /// + #[serde(alias = "Property")] + pub properties: Option>, + + /// Links established between the [`Self::subject`] and other resources. + /// + /// # Specification + /// + /// - + /// - + /// + #[serde(alias = "Link")] + pub links: Option>, +} + +/// A link element, which puts the subject resource in relation with another. +/// +/// # Specification +/// +/// - +/// +#[derive(Debug, Clone, Deserialize)] +pub struct ResourceDescriptorLink { + /// The kind of relation established by the subject with the attached resource. + /// + /// # Specification + /// + /// - + /// + #[serde(alias = "@rel")] + pub rel: String, + + /// The media type of the resource put in relation. + /// + /// # Specification + /// + /// - + /// + #[serde(alias = "@type")] + pub r#type: Option, + + /// URI to the resource put in relation. + /// + /// # Specification + /// + /// - + /// + #[serde(alias = "@href")] + pub href: Option, + + /// Titles of the resource put in relation in various languages. + /// + /// # Specification + /// + /// - + /// + pub titles: Option>>, + + /// Additional information about the resource put in relation. + /// + /// # Specification + /// + /// - + /// + pub properties: Option>, + + /// Template to fill to get the URL to resource-specific information. + /// + /// # Specification + /// + /// - + /// + #[serde(alias = "@template")] + pub template: Option, +} + +/// A property element, which describes a certain aspect of the subject resource. +/// +/// # Specification +/// +/// - +/// +#[derive(Debug, Clone, Deserialize)] +pub struct ResourceDescriptorProperty { + /// The property identifier, or type. + #[serde(alias = "@type")] + pub r#type: String, + + /// The property value. + pub value: Option, +} + +impl ResourceDescriptor { + /// Get a JRD (JSON [`ResourceDescriptor`]). + /// + /// # Notes + /// + /// This follows redirects until the redirect chain is 10 hops; see [`reqwest::redirect`] for more info. + /// + /// # Examples + /// + /// ``` + /// # tokio_test::block_on(async { + /// use acrate_hostmeta::ResourceDescriptor; + /// + /// let client = reqwest::Client::new(); + /// let url: reqwest::Url = "https://junimo.party/.well-known/nodeinfo".parse() + /// .expect("URL to be valid"); + /// + /// let rd = ResourceDescriptor::get_jrd(&client, url) + /// .await + /// .expect("JRD to be processed correctly"); + /// # }) + /// ``` + /// + pub async fn get_jrd(client: &reqwest::Client, url: reqwest::Url) -> Result { + use GetJRDError::*; + + log::debug!("Getting JRD document at: {url}"); + + log::trace!("Building request..."); + let request = { + log::trace!("Creating new request..."); + let mut request = reqwest::Request::new(reqwest::Method::GET, url); + + log::trace!("Setting request headers..."); + let headers = request.headers_mut(); + + log::trace!("Setting `Accept: application/json`..."); + let _ = headers.insert(reqwest::header::ACCEPT, "application/json".parse().unwrap()); + + request + }; + + log::trace!("Sending request..."); + let response = client.execute(request) + .await + .map_err(Request)?; + + log::trace!("Checking `Content-Type` of the response..."); + let content_type = response + .headers() + .get(reqwest::header::CONTENT_TYPE) + .ok_or(ContentTypeMissing)?; + + log::trace!("Extracting MIME type from the `Content-Type` header..."); + let mime_type = extract_mime_from_content_type(content_type) + .ok_or(ContentTypeInvalid)?; + + log::trace!("Ensuring MIME type is acceptable for JRD parsing..."); + if !(mime_type == "application/json" || mime_type == "application/jrd+json") { + log::error!("MIME type `{mime_type}` is not acceptable for JRD parsing."); + return Err(ContentTypeInvalid) + } + + log::trace!("Attempting to parse response as JSON..."); + let data = response.json::() + .await + .map_err(Parse)?; + + Ok(data) + } + + /// Get a XRD (Extensible [`ResourceDescriptor`]). + /// + /// # Notes + /// + /// This follows redirects until the redirect chain is 10 hops; see [`reqwest::redirect`] for more info. + /// + /// # Examples + /// + /// ``` + /// # tokio_test::block_on(async { + /// use acrate_hostmeta::ResourceDescriptor; + /// + /// let client = reqwest::Client::new(); + /// let url: reqwest::Url = "https://junimo.party/.well-known/host-meta".parse() + /// .expect("URL to be valid"); + /// + /// let rd = ResourceDescriptor::get_xrd(&client, url) + /// .await + /// .expect("XRD to be processed correctly"); + /// # }) + /// ``` + /// + pub async fn get_xrd(client: &reqwest::Client, url: reqwest::Url) -> Result { + use GetXRDError::*; + + log::debug!("Getting host-meta XRD document at: {url}"); + + log::trace!("Building request..."); + let request = { + log::trace!("Creating new request..."); + let mut request = reqwest::Request::new(reqwest::Method::GET, url); + + log::trace!("Setting request headers..."); + let headers = request.headers_mut(); + + log::trace!("Setting `Accept: application/xrd+xml`..."); + let _ = headers.insert(reqwest::header::ACCEPT, "application/xrd+xml".parse().unwrap()); + + request + }; + + log::trace!("Sending request..."); + let response = client.execute(request) + .await + .map_err(Request)?; + + log::trace!("Checking `Content-Type` of the response..."); + let content_type = response + .headers() + .get(reqwest::header::CONTENT_TYPE) + .ok_or(ContentTypeMissing)?; + + log::trace!("Extracting MIME type from the `Content-Type` header..."); + let mime_type = extract_mime_from_content_type(content_type) + .ok_or(ContentTypeInvalid)?; + + log::trace!("Ensuring MIME type is acceptable for XRD parsing..."); + if mime_type != "application/xrd+xml" { + log::error!("MIME type `{mime_type}` is not acceptable for XRD parsing."); + return Err(ContentTypeInvalid) + } + + log::trace!("Attempting to parse response as text..."); + let data = response.text() + .await + .map_err(Decode)?; + + log::trace!("Parsing response as XML..."); + let data = quick_xml::de::from_str::(&data) + .map_err(Parse)?; + + Ok(data) + } + + /// Attempt to discover a [`ResourceDescriptor`] at the given URL in various ways. + /// + /// In order, this method attempts: + /// + /// 1. HTTPS [XRD](Self::get_xrd) + /// 2. HTTPS [JRD](Self::get_jrd) + /// 3. HTTPS [JRD](Self::get_jrd) with .json path extension + /// 4. HTTP [XRD](Self::get_xrd) + /// 5. HTTP [JRD](Self::get_jrd) + /// 6. HTTP [JRD](Self::get_jrd) with .json path extension + /// + /// # Notes + /// + /// This follows redirects until the redirect chain is 10 hops; see [`reqwest::redirect`] for more info. + /// + /// # Examples + /// + /// ``` + /// # tokio_test::block_on(async { + /// use acrate_hostmeta::ResourceDescriptor; + /// + /// let client = reqwest::Client::new(); + /// let url: reqwest::Url = "https://junimo.party/.well-known/host-meta".parse() + /// .expect("URL to be valid"); + /// + /// let rd = ResourceDescriptor::discover(&client, url) + /// .await + /// .expect("resource descriptor to be discovered correctly"); + /// # }) + /// ``` + /// + pub async fn discover(client: &reqwest::Client, mut url: reqwest::Url) -> Result { + use ResourceDescriptorDiscoveryError::*; + + log::debug!("Discovering resource descriptor document at: {url}"); + + log::trace!("Unsetting URL query..."); + url.set_query(None); + + log::trace!("Unsetting URL fragment..."); + url.set_fragment(None); + + log::trace!("Setting URL scheme to HTTPS..."); + url.set_scheme("https") + .map_err(UrlManipulation)?; + + log::trace!("Cloning URL for HTTPS XRD retrieval..."); + let https_xrd_url = url.clone(); + + log::trace!("Attempting HTTPS XRD retrieval..."); + let https_xrd = match Self::get_xrd(client, https_xrd_url).await { + Ok(data) => { + log::trace!("HTTPS XRD retrieval was successful, returning..."); + return Ok(data) + } + Err(err) => { + log::warn!("HTTPS XRD retrieval failed."); + err + } + }; + + log::trace!("Cloning URL for HTTPS JRD retrieval..."); + let https_jrd_url = url.clone(); + + log::trace!("Attempting HTTPS JRD retrieval..."); + let https_jrd = match Self::get_jrd(client, https_jrd_url).await { + Ok(data) => { + log::trace!("HTTPS JRD retrieval was successful, returning..."); + return Ok(data) + } + Err(err) => { + log::warn!("HTTPS JRD retrieval failed."); + err + } + }; + + log::trace!("Cloning URL for HTTPS JRD .json retrieval..."); + let mut https_jrdj_url = url.clone(); + + log::trace!("Altering URL path for HTTPS JRD .json retrieval..."); + https_jrdj_url.set_path( + &format!("{}.json", https_jrdj_url.path()) + ); + + log::trace!("Attempting HTTPS JRD .json retrieval..."); + let https_jrdj = match Self::get_jrd(client, https_jrdj_url).await { + Ok(data) => { + log::trace!("HTTPS JRD .json retrieval was successful, returning..."); + return Ok(data) + } + Err(err) => { + log::warn!("HTTPS JRD .json retrieval failed."); + err + } + }; + + log::trace!("Setting URL scheme to HTTP..."); + url.set_scheme("http") + .map_err(UrlManipulation)?; + + log::trace!("Cloning URL for HTTP XRD retrieval..."); + let http_xrd_url = url.clone(); + + log::trace!("Attempting HTTP XRD retrieval..."); + let http_xrd = match Self::get_xrd(client, http_xrd_url).await { + Ok(data) => { + log::trace!("HTTP XRD retrieval was successful, returning..."); + return Ok(data) + } + Err(err) => { + log::warn!("HTTP XRD retrieval failed."); + err + } + }; + + log::trace!("Cloning URL for HTTP JRD retrieval..."); + let http_jrd_url = url.clone(); + + log::trace!("Attempting HTTP JRD retrieval..."); + let http_jrd = match Self::get_jrd(client, http_jrd_url).await { + Ok(data) => { + log::trace!("HTTP JRD retrieval was successful, returning..."); + return Ok(data) + } + Err(err) => { + log::warn!("HTTP JRD retrieval failed."); + err + } + }; + + log::trace!("Cloning URL for HTTP JRD .json retrieval..."); + let mut http_jrdj_url = url.clone(); + + log::trace!("Altering URL path for HTTPS JRD .json retrieval..."); + http_jrdj_url.set_path( + &format!("{}.json", http_jrdj_url.path()) + ); + + log::trace!("Attempting HTTP JRD .json retrieval..."); + let http_jrdj = match Self::get_jrd(client, http_jrdj_url).await { + Ok(data) => { + log::trace!("HTTP JRD .json retrieval was successful, returning..."); + return Ok(data) + } + Err(err) => { + log::warn!("HTTP JRD .json retrieval failed."); + err + } + }; + + Err( + ResourceDescriptorDiscoveryError::Fetch( + ResourceDescriptorDiscoveryFailures { + https_xrd, + https_jrd, + https_jrdj, + http_xrd, + http_jrd, + http_jrdj, + } + ) + ) + } + + /// Well-known path for host-meta documents. + /// + /// # Specification + /// + /// - + /// + pub const WELLKNOWN_HOSTMETA_PATH: &str = "/.well-known/host-meta"; + + /// Attempt to discover a host-meta document at the given base URL. + /// + /// # Examples + /// + /// ``` + /// # tokio_test::block_on(async { + /// use acrate_hostmeta::ResourceDescriptor; + /// + /// let client = reqwest::Client::new(); + /// let base: reqwest::Url = "https://junimo.party".parse() + /// .expect("URL to be valid"); + /// + /// let rd = ResourceDescriptor::discover_hostmeta(&client, base) + /// .await + /// .expect("host-meta to be discovered correctly"); + /// }) + /// ``` + /// + pub async fn discover_hostmeta(client: &reqwest::Client, mut base: reqwest::Url) -> Result { + base.set_path(Self::WELLKNOWN_HOSTMETA_PATH); + + Self::discover(client, base) + .await + } +} + +/// Error occurred during [`ResourceDescriptor::discover`]. +#[derive(Debug)] +pub enum ResourceDescriptorDiscoveryError { + /// Manipulation of the provided base [`reqwest::Url`] failed. + /// + /// See [reqwest::Url::set_scheme] for possible causes. + UrlManipulation(()), + + /// All attempts of fetching a resource descriptor document failed. + Fetch(ResourceDescriptorDiscoveryFailures), +} + +/// Request errors occurred during [`ResourceDescriptor::discover`]. +#[derive(Debug)] +pub struct ResourceDescriptorDiscoveryFailures { + /// HTTPS XRD retrieval. + pub https_xrd: GetXRDError, + + /// HTTPS JRD retrieval. + pub https_jrd: GetJRDError, + + /// HTTPS JRD with .json extension retrieval. + pub https_jrdj: GetJRDError, + + /// HTTPS XRD retrieval. + pub http_xrd: GetXRDError, + + /// HTTP JRD retrieval. + pub http_jrd: GetJRDError, + + /// HTTP JRD with .json extension retrieval. + pub http_jrdj: GetJRDError, +} + +/// Error occurred during [`ResourceDescriptor::get_xrd`]. +#[derive(Debug)] +pub enum GetXRDError { + /// The HTTP request failed. + Request(reqwest::Error), + /// The `Content-Type` header of the response is missing. + ContentTypeMissing, + /// The `Content-Type` header of the response is invalid. + ContentTypeInvalid, + /// The document failed to be read as text. + Decode(reqwest::Error), + /// The document failed to be parsed as XML by [`quick_xml`]. + Parse(quick_xml::DeError), +} + +/// Error occurred during [`ResourceDescriptor::get_jrd`]. +#[derive(Debug)] +pub enum GetJRDError { + /// The HTTP request failed. + Request(reqwest::Error), + /// The `Content-Type` header of the response is missing. + ContentTypeMissing, + /// The `Content-Type` header of the response is invalid. + ContentTypeInvalid, + /// The document failed to be parsed as JSON by [`reqwest`]. + Parse(reqwest::Error), +} + +/// Extract the MIME type from the value of the `Content-Type` header. +fn extract_mime_from_content_type(value: &reqwest::header::HeaderValue) -> Option { + let value = value.to_str().ok()?; + match value.split_once("; ") { + None => Some(value.to_string()), + Some((mime, _)) => Some(mime.to_string()), + } +} diff --git a/acrate-hostmeta/tests/hostmeta_tests.rs b/acrate-hostmeta/tests/hostmeta_tests.rs new file mode 100644 index 0000000..1bdd795 --- /dev/null +++ b/acrate-hostmeta/tests/hostmeta_tests.rs @@ -0,0 +1,62 @@ +const CARGO_PKG_NAME: &str = env!("CARGO_PKG_NAME"); +const CARGO_PKG_VERSION: &str = env!("CARGO_PKG_VERSION"); +const CARGO_PKG_REPOSITORY: &str = env!("CARGO_PKG_REPOSITORY"); + + +fn init_log() { + let mut builder = pretty_env_logger::formatted_builder(); + builder.target(pretty_env_logger::env_logger::Target::Stdout); + builder.filter_level(log::LevelFilter::max()); + builder.is_test(true); + + if builder.try_init().is_ok() { + log::debug!("Initialized logging!"); + } +} + +fn make_client() -> reqwest::Client { + let user_agent = format!("{CARGO_PKG_NAME}/{CARGO_PKG_VERSION} ({CARGO_PKG_REPOSITORY})"); + + reqwest::Client::builder() + .user_agent(user_agent) + .build() + .expect("reqwest client to build") +} + +macro_rules! test_discover_hostmeta { + ($id:ident, $url:literal) => { + test_discover_hostmeta!($id, $url,); + }; + ($id:ident, $url:literal, $($tag:meta),*) => { + mod $id { + use acrate_hostmeta::*; + use super::*; + + #[tokio::test] + $(#[$tag])* + async fn test() { + init_log(); + let client = make_client(); + + let base: reqwest::Url = $url.parse() + .expect("a valid URL"); + + let doc = ResourceDescriptor::discover_hostmeta(&client, base) + .await + .expect("host-meta discovery to succeed"); + + log::info!("Parsed host-meta document: {doc:#?}"); + } + } + }; +} + +test_discover_hostmeta!(akkoma, "https://junimo.party"); +test_discover_hostmeta!(mastodon, "https://mastodon.social"); +test_discover_hostmeta!(misskey, "https://misskey.io"); +test_discover_hostmeta!(iceshrimpnet, "https://ice.frieren.quest"); +test_discover_hostmeta!(gotosocial, "https://alpha.polymaths.social"); +test_discover_hostmeta!(bridgyfed, "https://fed.brid.gy"); +test_discover_hostmeta!(threads, "https://threads.net", ignore = "does not support host-meta"); +test_discover_hostmeta!(snac, "https://ngoa.giao.loan", ignore = "does not support host-meta"); +test_discover_hostmeta!(hollo, "https://hollo.social", ignore = "does not support host-meta"); diff --git a/acrate-nodeinfo/Cargo.toml b/acrate-nodeinfo/Cargo.toml index 6832979..d8ca425 100644 --- a/acrate-nodeinfo/Cargo.toml +++ b/acrate-nodeinfo/Cargo.toml @@ -4,3 +4,16 @@ version = "0.1.0" edition = "2021" [dependencies] +acrate-hostmeta = { version = "0.1.0", path = "../acrate-hostmeta" } +log = "0.4.22" +reqwest = { version = "0.12.9", features = ["json", "stream"] } +serde = { version = "1.0.214", features = ["derive"] } +serde_json = "1.0.132" + +[dev-dependencies] +pretty_env_logger = "0.5.0" +tokio = { version = "1.41.1", features = ["macros", "rt-multi-thread"] } +tokio-test = "0.4.4" + +[lints.clippy] +tabs-in-doc-comments = "allow" diff --git a/acrate-nodeinfo/src/lib.rs b/acrate-nodeinfo/src/lib.rs index b93cf3f..0123a3d 100644 --- a/acrate-nodeinfo/src/lib.rs +++ b/acrate-nodeinfo/src/lib.rs @@ -1,14 +1,469 @@ -pub fn add(left: u64, right: u64) -> u64 { - left + right +//! Serde-based NodeInfo fetcher and loose parser. +//! +//! > NodeInfo is an effort to create a standardized way of exposing metadata about a server running one of the distributed social networks. +//! +//! # Specification +//! +//! - + +use serde::Deserialize; + +/// A variant of a NodeInfo document. +/// +/// # Specification +/// +/// - +#[derive(Debug, Clone)] +pub enum NodeInfo { + V1(NodeInfo1), + V2(NodeInfo2), } -#[cfg(test)] -mod tests { - use super::*; +/// A NodeInfo document at version 1.X. +/// +/// # Specification +/// +/// - +/// - +/// +#[derive(Debug, Clone, Deserialize)] +#[serde(rename_all = "camelCase")] +pub struct NodeInfo1 { + /// The schema version. + pub version: String, - #[test] - fn it_works() { - let result = add(2, 2); - assert_eq!(result, 4); - } + /// Metadata about server software in use. + pub software: NodeInfo1Software, + + /// The protocols supported on this server. + pub protocols: NodeInfo1Protocols, + + /// The third party sites this server can connect to via their application API. + pub services: NodeInfo1Services, + + /// Whether this server allows open self-registration. + pub open_registrations: bool, + + /// Usage statistics for this server. + pub usage: NodeInfo1Usage, + + /// Free form key value pairs for software specific values. + /// + /// Clients should not rely on any specific key present. + pub metadata: serde_json::Value, +} + +/// A NodeInfo document at version 2.X. +/// +/// # Specification +/// +/// - +/// - +/// - +/// +#[derive(Debug, Clone, Deserialize)] +#[serde(rename_all = "camelCase")] +pub struct NodeInfo2 { + /// The schema version. + pub version: String, + + /// Metadata specific to the instance. An instance is a the concrete installation of a software running on a server. + pub instance: Option, + + /// Metadata about server software in use. + pub software: NodeInfo1Software, + + /// The protocols supported on this server. + pub protocols: Vec, + + /// The third party sites this server can connect to via their application API. + pub services: Option, + + /// Whether this server allows open self-registration. + pub open_registrations: bool, + + /// Usage statistics for this server. + pub usage: Option, + + /// Free form key value pairs for software specific values. + /// + /// Clients should not rely on any specific key present. + pub metadata: serde_json::Value, +} + +#[derive(Debug, Clone, Deserialize)] +#[serde(rename_all = "camelCase")] +pub struct NodeInfo1Software { + /// The canonical name of this server software. + pub name: String, + + /// The version of this server software. + pub version: String, + + pub repository: Option, + + pub homepage: Option, +} + +#[derive(Debug, Clone, Deserialize)] +#[serde(rename_all = "camelCase")] +pub struct NodeInfo1Protocols { + /// The protocols this server can receive traffic for. + pub inbound: Vec, + + /// The protocols this server can generate traffic for. + pub outbound: Vec, +} + +#[derive(Debug, Clone, Deserialize)] +#[serde(rename_all = "camelCase")] +pub struct NodeInfo1Services { + /// The third party sites this server can retrieve messages from for combined display with regular traffic. + pub inbound: Vec, + + /// The third party sites this server can publish messages to on the behalf of a user. + pub outbound: Vec, +} + +#[derive(Debug, Clone, Deserialize)] +#[serde(rename_all = "camelCase")] +pub struct NodeInfo1Usage { + /// Statistics about the users of this server. + pub users: Option, + + /// The amount of posts that were made by users that are registered on this server. + pub local_posts: Option, + + /// The amount of comments that were made by users that are registered on this server. + pub local_comments: Option, +} + +#[derive(Debug, Clone, Deserialize)] +#[serde(rename_all = "camelCase")] +pub struct NodeInfo1UsageUsers { + /// The total amount of on this server registered users. + pub total: Option, + + /// The amount of users that signed in at least once in the last 180 days. + pub active_halfyear: Option, + + /// The amount of users that signed in at least once in the last 30 days. + pub active_month: Option, + + /// The amount of users that signed in at least once in the last 7 days. + pub active_week: Option, +} + +#[derive(Debug, Clone, Deserialize)] +#[serde(rename_all = "camelCase")] +pub struct NodeInfo2Instance { + /// If supported by the software, the administrator-configured name of this instance. + pub name: String, + + /// If supported by the software, the administrator-configured long form description of this instance. + pub description: String, +} + +impl NodeInfo { + /// Well-known path for NodeInfo documents. + /// + /// # Specification + /// + /// - + pub const WELLKNOWN_NODEINFO_PATH: &str = "/.well-known/nodeinfo"; + + /// Discover and get the latest NodeInfo version available given a certain base URL. + /// + /// # Examples + /// + /// ``` + /// # tokio_test::block_on(async { + /// use acrate_nodeinfo::NodeInfo; + /// + /// let client = reqwest::Client::new(); + /// let url: reqwest::Url = "https://mastodon.online".parse() + /// .expect("URL to be valid"); + /// + /// let ni = NodeInfo::get_latest_wellknown(&client, url) + /// .await + /// .expect("NodeInfo to be processed correctly"); + /// + /// let version = match ni { + /// NodeInfo::V1(ni) => ni.version, + /// NodeInfo::V2(ni) => ni.version, + /// }; + /// + /// assert_eq!(version, "2.0"); + /// # }) + pub async fn get_latest_wellknown(client: &reqwest::Client, mut base: reqwest::Url) -> Result { + use NodeInfoGetWellknownError::*; + + log::debug!("Getting well-known NodeInfo document at base: {base}"); + + log::trace!("Setting URL path to the well-known NodeInfo value..."); + base.set_path(Self::WELLKNOWN_NODEINFO_PATH); + + log::trace!("Discovering NodeInfo document locations..."); + let discovery = acrate_hostmeta::ResourceDescriptor::discover(client, base) + .await + .map_err(Discovery)?; + + log::trace!("Getting a list of NodeInfo document links..."); + let mut links = discovery.links.unwrap_or_default(); + links.sort_unstable_by_key(|o| o.rel.clone()); // TODO: Performance can be improved. + links.reverse(); + + for link in links.into_iter() { + + log::trace!("Checking discovered link href..."); + let url = match link.href { + None => { + log::warn!("Discovered link does not have an href, skipping..."); + continue + }, + Some(href) => { + log::trace!("Discovered link has an href, processing..."); + href + }, + }; + + log::trace!("Parsing discovered link href..."); + let url: reqwest::Url = match url.parse() { + Err(e) => { + log::warn!("Discovered link has an invalid URL as href, skipping: {e:#?}"); + continue + }, + Ok(url) => { + log::trace!("Discovered link has a valid URL, processing..."); + url + }, + }; + + let rel = link.rel; + + let nodeinfo = match rel.as_str() { + "http://nodeinfo.diaspora.software/ns/schema/1.0" => match NodeInfo1::get(client, url).await { + Err(e) => { + log::warn!("Failed to get NodeInfo v1.0 document, skipping: {e:#?}"); + continue; + }, + Ok(nodeinfo) => { + log::trace!("Successfully processed NodeInfo v1.0 document!"); + Self::V1(nodeinfo) + } + } + "http://nodeinfo.diaspora.software/ns/schema/1.1" => match NodeInfo1::get(client, url).await { + Err(e) => { + log::warn!("Failed to get NodeInfo v1.1 document, skipping: {e:#?}"); + continue; + }, + Ok(nodeinfo) => { + log::trace!("Successfully processed NodeInfo v1.1 document!"); + Self::V1(nodeinfo) + } + } + "http://nodeinfo.diaspora.software/ns/schema/2.0" => match NodeInfo2::get(client, url).await { + Err(e) => { + log::warn!("Failed to get NodeInfo v2.0 document, skipping: {e:#?}"); + continue; + }, + Ok(nodeinfo) => { + log::trace!("Successfully processed NodeInfo v2.0 document!"); + Self::V2(nodeinfo) + } + } + "http://nodeinfo.diaspora.software/ns/schema/2.1" => match NodeInfo2::get(client, url).await { + Err(e) => { + log::warn!("Failed to get NodeInfo v2.1 document, skipping: {e:#?}"); + continue; + }, + Ok(nodeinfo) => { + log::trace!("Successfully processed NodeInfo v2.1 document!"); + Self::V2(nodeinfo) + } + } + "http://nodeinfo.diaspora.software/ns/schema/2.2" => match NodeInfo2::get(client, url).await { + Err(e) => { + log::warn!("Failed to get NodeInfo v2.2 document, skipping: {e:#?}"); + continue; + }, + Ok(nodeinfo) => { + log::trace!("Successfully processed NodeInfo v2.2 document!"); + Self::V2(nodeinfo) + } + } + _ => { + log::warn!("Discovered link has unknown rel `{rel}`, skipping."); + continue; + }, + }; + + log::trace!("Successfully retrieved latest NodeInfo: {nodeinfo:#?}"); + return Ok(nodeinfo); + } + + log::warn!("Ran out of possible NodeInfo sources, returning an Unsupported error."); + Err(Unsupported) + } +} + +/// An error occurred during [`NodeInfo::get_latest_wellknown`]. +#[derive(Debug)] +pub enum NodeInfoGetWellknownError { + /// The discovery of possible locations for NodeInfo documents failed. + Discovery(acrate_hostmeta::ResourceDescriptorDiscoveryError), + /// No compatible NodeInfo documents were detected at the given URL. + Unsupported, +} + +impl NodeInfo1 { + /// Get a NodeInfo v1.X document. + pub async fn get(client: &reqwest::Client, url: reqwest::Url) -> Result { + use NodeInfoGetError::*; + + log::debug!("Getting NodeInfo v1.X document at: {url}"); + + log::trace!("Building request..."); + let request = { + log::trace!("Creating new request..."); + let mut request = reqwest::Request::new(reqwest::Method::GET, url); + + log::trace!("Setting request headers..."); + let headers = request.headers_mut(); + + log::trace!("Setting `Accept: application/json`..."); + let _ = headers.insert(reqwest::header::ACCEPT, "application/json".parse().unwrap()); + + request + }; + + log::trace!("Sending request..."); + let response = client.execute(request) + .await + .map_err(Request)?; + + log::trace!("Checking `Content-Type` of the response..."); + let content_type = response + .headers() + .get(reqwest::header::CONTENT_TYPE) + .ok_or(ContentTypeMissing)?; + + log::trace!("Extracting MIME type from the `Content-Type` header..."); + let mime_type = extract_mime_from_content_type(content_type) + .ok_or(ContentTypeInvalid)?; + + log::trace!("Ensuring MIME type is acceptable for NodeInfo documents..."); + if mime_type != "application/json" { + log::error!("MIME type `{mime_type}` is not acceptable for NodeInfo documents."); + return Err(ContentTypeInvalid) + } + + log::trace!("Attempting to parse response as JSON..."); + let data = response.json::() + .await + .map_err(Parse)?; + + log::trace!("Making sure version is compatible with 1.X..."); + if !data.version.starts_with("1.") { + return Err(Version); + } + + Ok(data) + } +} + +impl NodeInfo2 { + /// Get a NodeInfo v2.X document. + /// + /// # Examples + /// + /// ``` + /// # tokio_test::block_on(async { + /// use acrate_nodeinfo::NodeInfo2; + /// + /// let client = reqwest::Client::new(); + /// let url: reqwest::Url = "https://junimo.party/nodeinfo/2.1.json".parse() + /// .expect("URL to be valid"); + /// + /// let rd = NodeInfo2::get(&client, url) + /// .await + /// .expect("NodeInfo to be obtained correctly"); + /// # }) + /// ``` + pub async fn get(client: &reqwest::Client, url: reqwest::Url) -> Result { + use NodeInfoGetError::*; + + log::debug!("Getting NodeInfo v2.X document at: {url}"); + + log::trace!("Building request..."); + let request = { + log::trace!("Creating new request..."); + let mut request = reqwest::Request::new(reqwest::Method::GET, url); + + log::trace!("Setting request headers..."); + let headers = request.headers_mut(); + + log::trace!("Setting `Accept: application/json`..."); + let _ = headers.insert(reqwest::header::ACCEPT, "application/json".parse().unwrap()); + + request + }; + + log::trace!("Sending request..."); + let response = client.execute(request) + .await + .map_err(Request)?; + + log::trace!("Checking `Content-Type` of the response..."); + let content_type = response + .headers() + .get(reqwest::header::CONTENT_TYPE) + .ok_or(ContentTypeMissing)?; + + log::trace!("Extracting MIME type from the `Content-Type` header..."); + let mime_type = extract_mime_from_content_type(content_type) + .ok_or(ContentTypeInvalid)?; + + log::trace!("Ensuring MIME type is acceptable for NodeInfo documents..."); + if mime_type != "application/json" { + log::error!("MIME type `{mime_type}` is not acceptable for NodeInfo documents."); + return Err(ContentTypeInvalid) + } + + log::trace!("Attempting to parse response as JSON..."); + let data = response.json::() + .await + .map_err(Parse)?; + + log::trace!("Making sure version is compatible with 2.X..."); + if !data.version.starts_with("2.") { + return Err(Version) + } + + Ok(data) + } +} + +/// An error encountered during [`NodeInfo1::get`] or [`NodeInfo2::get`]. +#[derive(Debug)] +pub enum NodeInfoGetError { + /// The HTTP request failed. + Request(reqwest::Error), + /// The `Content-Type` header of the response is missing. + ContentTypeMissing, + /// The `Content-Type` header of the response is invalid. + ContentTypeInvalid, + /// The document failed to be parsed as JSON by [`reqwest`]. + Parse(reqwest::Error), + /// The returned version does not match the version of the created struct. + Version, +} + +/// Extract the MIME type from the value of the `Content-Type` header. +fn extract_mime_from_content_type(value: &reqwest::header::HeaderValue) -> Option { + let value = value.to_str().ok()?; + match value.split_once("; ") { + None => Some(value.to_string()), + Some((mime, _)) => Some(mime.to_string()), + } } diff --git a/acrate-nodeinfo/tests/nodeinfo_tests.rs b/acrate-nodeinfo/tests/nodeinfo_tests.rs new file mode 100644 index 0000000..e9e2fba --- /dev/null +++ b/acrate-nodeinfo/tests/nodeinfo_tests.rs @@ -0,0 +1,70 @@ +const CARGO_PKG_NAME: &str = env!("CARGO_PKG_NAME"); +const CARGO_PKG_VERSION: &str = env!("CARGO_PKG_VERSION"); +const CARGO_PKG_REPOSITORY: &str = env!("CARGO_PKG_REPOSITORY"); + + +fn init_log() { + let mut builder = pretty_env_logger::formatted_builder(); + builder.target(pretty_env_logger::env_logger::Target::Stdout); + builder.filter_level(log::LevelFilter::max()); + builder.is_test(true); + + if builder.try_init().is_ok() { + log::debug!("Initialized logging!"); + } +} + +fn make_client() -> reqwest::Client { + let user_agent = format!("{CARGO_PKG_NAME}/{CARGO_PKG_VERSION} ({CARGO_PKG_REPOSITORY})"); + + reqwest::Client::builder() + .user_agent(user_agent) + .build() + .expect("reqwest client to build") +} + + +macro_rules! test { + ($id:ident, $url:literal, $version:literal) => { + test!($id, $url, $version,); + }; + ($id:ident, $url:literal, $version:literal, $($tag:meta),*) => { + mod $id { + use acrate_nodeinfo::*; + use super::*; + + #[tokio::test] + $(#[$tag])* + async fn test_version() { + init_log(); + let client = make_client(); + + let base: reqwest::Url = $url.parse() + .expect("a valid URL"); + + let doc = NodeInfo::get_latest_wellknown(&client, base) + .await + .expect("NodeInfo discovery to succeed"); + + log::info!("Parsed NodeInfo document: {doc:#?}"); + + let version = match doc { + NodeInfo::V1(d) => d.version, + NodeInfo::V2(d) => d.version, + }; + + assert_eq!(version, $version); + } + } + }; +} + +test!(akkoma, "https://junimo.party", "2.1"); +test!(mastodon, "https://mastodon.social", "2.0"); +test!(misskey, "https://misskey.io", "2.1"); +test!(iceshrimpnet, "https://ice.frieren.quest", "2.1"); +test!(gotosocial, "https://alpha.polymaths.social", "2.0"); +test!(bridgyfed, "https://fed.brid.gy", "2.1"); +test!(threads, "https://threads.net", "", ignore = "does not support NodeInfo"); +test!(snac, "https://ngoa.giao.loan", "2.0"); +test!(hollo, "https://hollo.social", "2.1");