tirocinio-canali-steffo-acrate/acrate_nodeinfo/src/lib.rs

500 lines
15 KiB
Rust
Raw Normal View History

2024-11-19 02:31:07 +00:00
//! Rust typing and utilities for the NodeInfo format.
2024-11-11 02:20:48 +00:00
//!
//! > NodeInfo is an effort to create a standardized way of exposing metadata about a server running one of the distributed social networks.
2024-11-11 07:43:22 +00:00
//!
//! # Specification
//!
//! - <https://github.com/jhass/nodeinfo/blob/main/PROTOCOL.md>
2024-11-12 01:53:52 +00:00
//! - <https://codeberg.org/fediverse/fep/src/branch/main/fep/f1d5/fep-f1d5.md>
2024-11-19 02:31:07 +00:00
//!
2024-11-19 05:06:48 +00:00
use mediatype::MediaTypeBuf;
use serde::Deserialize;
2024-11-13 06:31:39 +00:00
use thiserror::Error;
/// A variant of a NodeInfo document.
///
2024-11-11 07:43:22 +00:00
/// # Specification
///
2024-11-11 07:43:22 +00:00
/// - <https://github.com/jhass/nodeinfo/blob/main/PROTOCOL.md>
2024-11-19 02:31:07 +00:00
///
#[derive(Debug, Clone)]
pub enum NodeInfo {
V1(NodeInfo1),
V2(NodeInfo2),
}
/// A NodeInfo document at version 1.X.
///
2024-11-11 07:43:22 +00:00
/// # Specification
///
2024-11-11 07:43:22 +00:00
/// - <https://github.com/jhass/nodeinfo/blob/main/schemas/1.0/schema.json>
/// - <https://github.com/jhass/nodeinfo/blob/main/schemas/1.1/schema.json>
///
#[derive(Debug, Clone, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct NodeInfo1 {
/// The schema version.
pub version: String,
/// Metadata about server software in use.
pub software: NodeInfo1Software,
/// The protocols supported on this server.
pub protocols: Option<NodeInfo1Protocols>,
/// The third party sites this server can connect to via their application API.
pub services: Option<NodeInfo1Services>,
/// Whether this server allows open self-registration.
pub open_registrations: Option<bool>,
/// Usage statistics for this server.
pub usage: Option<NodeInfo1Usage>,
/// Free form key value pairs for software specific values.
///
/// Clients should not rely on any specific key present.
pub metadata: serde_json::Value,
}
/// A NodeInfo document at version 2.X.
///
2024-11-11 07:43:22 +00:00
/// # Specification
///
2024-11-11 07:43:22 +00:00
/// - <https://github.com/jhass/nodeinfo/blob/main/schemas/2.0/schema.json>
/// - <https://github.com/jhass/nodeinfo/blob/main/schemas/2.1/schema.json>
/// - <https://github.com/jhass/nodeinfo/blob/main/schemas/2.2/schema.json>
///
#[derive(Debug, Clone, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct NodeInfo2 {
/// The schema version.
pub version: String,
/// Metadata specific to the instance. An instance is a the concrete installation of a software running on a server.
pub instance: Option<NodeInfo2Instance>,
/// Metadata about server software in use.
pub software: Option<NodeInfo1Software>,
/// The protocols supported on this server.
pub protocols: Option<Vec<String>>,
/// The third party sites this server can connect to via their application API.
pub services: Option<NodeInfo1Services>,
/// Whether this server allows open self-registration.
pub open_registrations: Option<bool>,
/// Usage statistics for this server.
pub usage: Option<NodeInfo1Usage>,
/// Free form key value pairs for software specific values.
///
/// Clients should not rely on any specific key present.
pub metadata: serde_json::Value,
}
#[derive(Debug, Clone, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct NodeInfo1Software {
/// The canonical name of this server software.
pub name: String,
/// The version of this server software.
pub version: String,
pub repository: Option<String>,
pub homepage: Option<String>,
}
#[derive(Debug, Clone, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct NodeInfo1Protocols {
/// The protocols this server can receive traffic for.
pub inbound: Vec<String>,
/// The protocols this server can generate traffic for.
pub outbound: Vec<String>,
}
#[derive(Debug, Clone, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct NodeInfo1Services {
/// The third party sites this server can retrieve messages from for combined display with regular traffic.
pub inbound: Vec<String>,
/// The third party sites this server can publish messages to on the behalf of a user.
pub outbound: Vec<String>,
}
#[derive(Debug, Clone, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct NodeInfo1Usage {
/// Statistics about the users of this server.
pub users: Option<NodeInfo1UsageUsers>,
/// The amount of posts that were made by users that are registered on this server.
pub local_posts: Option<i32>,
/// The amount of comments that were made by users that are registered on this server.
pub local_comments: Option<i32>,
}
#[derive(Debug, Clone, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct NodeInfo1UsageUsers {
/// The total amount of on this server registered users.
pub total: Option<i32>,
/// The amount of users that signed in at least once in the last 180 days.
pub active_halfyear: Option<i32>,
/// The amount of users that signed in at least once in the last 30 days.
pub active_month: Option<i32>,
/// The amount of users that signed in at least once in the last 7 days.
pub active_week: Option<i32>,
}
#[derive(Debug, Clone, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct NodeInfo2Instance {
/// If supported by the software, the administrator-configured name of this instance.
pub name: String,
/// If supported by the software, the administrator-configured long form description of this instance.
pub description: Option<String>,
}
impl NodeInfo {
2024-11-11 07:31:09 +00:00
/// Well-known path for NodeInfo documents.
///
2024-11-11 07:43:22 +00:00
/// # Specification
2024-11-11 07:31:09 +00:00
///
2024-11-11 07:43:22 +00:00
/// - <https://github.com/jhass/nodeinfo/blob/main/PROTOCOL.md#discovery>
2024-11-14 00:37:48 +00:00
pub const WELLKNOWN_NODEINFO_PATH: &'static str = "/.well-known/nodeinfo";
2024-11-11 07:31:09 +00:00
/// Discover and get the latest NodeInfo version available given a certain base URL.
///
2024-11-11 07:43:22 +00:00
/// # Examples
///
/// ```
/// # tokio_test::block_on(async {
/// use acrate_nodeinfo::NodeInfo;
///
/// let client = reqwest::Client::new();
/// let url: reqwest::Url = "https://mastodon.online".parse()
/// .expect("URL to be valid");
///
/// let ni = NodeInfo::get_latest_wellknown(&client, url)
/// .await
/// .expect("NodeInfo to be processed correctly");
///
/// let version = match ni {
/// NodeInfo::V1(ni) => ni.version,
/// NodeInfo::V2(ni) => ni.version,
/// };
///
/// assert_eq!(version, "2.0");
/// # })
2024-11-11 07:31:09 +00:00
pub async fn get_latest_wellknown(client: &reqwest::Client, mut base: reqwest::Url) -> Result<Self, NodeInfoGetWellknownError> {
use NodeInfoGetWellknownError::*;
2024-11-11 07:31:09 +00:00
log::debug!("Getting well-known NodeInfo document at base: {base}");
log::trace!("Setting URL path to the well-known NodeInfo value...");
base.set_path(Self::WELLKNOWN_NODEINFO_PATH);
log::trace!("Discovering NodeInfo document locations...");
2024-11-19 02:31:07 +00:00
let discovery = acrate_rd::any::ResourceDescriptor::get(client, base)
.await
2024-11-14 01:47:00 +00:00
.map_err(Get)?
.jrd();
log::trace!("Getting a list of NodeInfo document links...");
2024-11-14 01:47:00 +00:00
let mut links = discovery.links;
links.sort_unstable_by_key(|o| o.rel.clone()); // TODO: Performance can be improved.
links.reverse();
for link in links.into_iter() {
log::trace!("Checking discovered link href...");
let url = match link.href {
None => {
log::warn!("Discovered link does not have an href, skipping...");
continue
},
Some(href) => {
log::trace!("Discovered link has an href, processing...");
href
},
};
log::trace!("Parsing discovered link href...");
let url: reqwest::Url = match url.parse() {
Err(e) => {
log::warn!("Discovered link has an invalid URL as href, skipping: {e:#?}");
continue
},
Ok(url) => {
log::trace!("Discovered link has a valid URL, processing...");
url
},
};
let rel = link.rel;
let nodeinfo = match rel.as_str() {
"http://nodeinfo.diaspora.software/ns/schema/1.0" => match NodeInfo1::get(client, url).await {
Err(e) => {
log::warn!("Failed to get NodeInfo v1.0 document, skipping: {e:#?}");
continue;
},
Ok(nodeinfo) => {
log::trace!("Successfully processed NodeInfo v1.0 document!");
Self::V1(nodeinfo)
}
}
"http://nodeinfo.diaspora.software/ns/schema/1.1" => match NodeInfo1::get(client, url).await {
Err(e) => {
log::warn!("Failed to get NodeInfo v1.1 document, skipping: {e:#?}");
continue;
},
Ok(nodeinfo) => {
log::trace!("Successfully processed NodeInfo v1.1 document!");
Self::V1(nodeinfo)
}
}
"http://nodeinfo.diaspora.software/ns/schema/2.0" => match NodeInfo2::get(client, url).await {
Err(e) => {
log::warn!("Failed to get NodeInfo v2.0 document, skipping: {e:#?}");
continue;
},
Ok(nodeinfo) => {
log::trace!("Successfully processed NodeInfo v2.0 document!");
Self::V2(nodeinfo)
}
}
"http://nodeinfo.diaspora.software/ns/schema/2.1" => match NodeInfo2::get(client, url).await {
Err(e) => {
log::warn!("Failed to get NodeInfo v2.1 document, skipping: {e:#?}");
continue;
},
Ok(nodeinfo) => {
log::trace!("Successfully processed NodeInfo v2.1 document!");
Self::V2(nodeinfo)
}
}
"http://nodeinfo.diaspora.software/ns/schema/2.2" => match NodeInfo2::get(client, url).await {
Err(e) => {
log::warn!("Failed to get NodeInfo v2.2 document, skipping: {e:#?}");
continue;
},
Ok(nodeinfo) => {
log::trace!("Successfully processed NodeInfo v2.2 document!");
Self::V2(nodeinfo)
}
}
_ => {
log::warn!("Discovered link has unknown rel `{rel}`, skipping.");
continue;
},
};
2024-11-11 07:31:09 +00:00
log::trace!("Successfully retrieved latest NodeInfo: {nodeinfo:#?}");
return Ok(nodeinfo);
}
log::warn!("Ran out of possible NodeInfo sources, returning an Unsupported error.");
Err(Unsupported)
}
}
/// An error occurred during [`NodeInfo::get_latest_wellknown`].
2024-11-13 06:31:39 +00:00
#[derive(Debug, Error)]
pub enum NodeInfoGetWellknownError {
/// The discovery of possible locations for NodeInfo documents failed.
2024-11-13 06:31:39 +00:00
#[error("the discovery of possible locations for NodeInfo documents failed")]
2024-11-19 02:31:07 +00:00
Get(acrate_rd::any::GetError),
/// No compatible NodeInfo documents were detected at the given URL.
2024-11-13 06:31:39 +00:00
#[error("no compatible NodeInfo documents were detected at the given URL")]
Unsupported,
}
impl NodeInfo1 {
/// Get a NodeInfo v1.X document.
pub async fn get(client: &reqwest::Client, url: reqwest::Url) -> Result<Self, NodeInfoGetError> {
use NodeInfoGetError::*;
log::debug!("Getting NodeInfo v1.X document at: {url}");
log::trace!("Building request...");
let request = {
log::trace!("Creating new request...");
let mut request = reqwest::Request::new(reqwest::Method::GET, url);
log::trace!("Setting request headers...");
let headers = request.headers_mut();
log::trace!("Setting `Accept: application/json`...");
let _ = headers.insert(reqwest::header::ACCEPT, "application/json".parse().unwrap());
request
};
log::trace!("Sending request...");
let response = client.execute(request)
.await
.map_err(Request)?;
2024-11-19 02:31:07 +00:00
log::trace!("Checking `Content-Type` of the response...");
let content_type = response
.headers()
.get(reqwest::header::CONTENT_TYPE)
.ok_or(ContentTypeMissing)?;
2024-11-19 02:31:07 +00:00
2024-11-19 05:06:48 +00:00
log::trace!("Extracting media type from the `Content-Type` header: {content_type:?}");
let media_type: MediaTypeBuf = content_type
.to_str()
.map_err(ContentTypeUnprintable)?
.parse()
2024-11-19 02:31:07 +00:00
.map_err(ContentTypeInvalid)?;
2024-11-19 05:06:48 +00:00
log::trace!("Checking if media type is supported: {media_type:?}");
let mime_is_json = media_type.essence().eq(&"application/json".parse::<MediaTypeBuf>().unwrap());
log::trace!("Is media type `application/json`? {mime_is_json:?}");
2024-11-19 02:31:07 +00:00
if !mime_is_json {
2024-11-19 05:06:48 +00:00
log::error!("Media type `{media_type}` is not acceptable for NodeInfo parsing.");
2024-11-19 02:31:07 +00:00
return Err(ContentTypeUnsupported);
}
log::trace!("Attempting to parse response as JSON...");
let data = response.json::<Self>()
.await
.map_err(Parse)?;
log::trace!("Making sure version is compatible with 1.X...");
if !data.version.starts_with("1.") {
return Err(Version);
}
Ok(data)
}
}
impl NodeInfo2 {
/// Get a NodeInfo v2.X document.
///
2024-11-11 07:43:22 +00:00
/// # Examples
///
/// ```
/// # tokio_test::block_on(async {
/// use acrate_nodeinfo::NodeInfo2;
///
/// let client = reqwest::Client::new();
/// let url: reqwest::Url = "https://junimo.party/nodeinfo/2.1.json".parse()
/// .expect("URL to be valid");
///
/// let rd = NodeInfo2::get(&client, url)
/// .await
/// .expect("NodeInfo to be obtained correctly");
/// # })
/// ```
pub async fn get(client: &reqwest::Client, url: reqwest::Url) -> Result<Self, NodeInfoGetError> {
use NodeInfoGetError::*;
log::debug!("Getting NodeInfo v2.X document at: {url}");
log::trace!("Building request...");
let request = {
log::trace!("Creating new request...");
let mut request = reqwest::Request::new(reqwest::Method::GET, url);
log::trace!("Setting request headers...");
let headers = request.headers_mut();
log::trace!("Setting `Accept: application/json`...");
let _ = headers.insert(reqwest::header::ACCEPT, "application/json".parse().unwrap());
request
};
log::trace!("Sending request...");
let response = client.execute(request)
.await
.map_err(Request)?;
2024-11-19 02:31:07 +00:00
log::trace!("Checking `Content-Type` of the response...");
let content_type = response
.headers()
.get(reqwest::header::CONTENT_TYPE)
.ok_or(ContentTypeMissing)?;
2024-11-19 02:31:07 +00:00
2024-11-19 05:06:48 +00:00
log::trace!("Extracting media type from the `Content-Type` header: {content_type:?}");
let media_type: MediaTypeBuf = content_type
.to_str()
.map_err(ContentTypeUnprintable)?
.parse()
2024-11-19 02:31:07 +00:00
.map_err(ContentTypeInvalid)?;
2024-11-19 05:06:48 +00:00
log::trace!("Checking if media type is supported: {media_type:?}");
let mime_is_json = media_type.essence().eq(&"application/json".parse::<MediaTypeBuf>().unwrap());
log::trace!("Is media type `application/json`? {mime_is_json:?}");
2024-11-19 02:31:07 +00:00
if !mime_is_json {
2024-11-19 05:06:48 +00:00
log::error!("Media type `{media_type}` is not acceptable for NodeInfo parsing.");
2024-11-19 02:31:07 +00:00
return Err(ContentTypeUnsupported);
}
log::trace!("Attempting to parse response as JSON...");
let data = response.json::<Self>()
.await
.map_err(Parse)?;
log::trace!("Making sure version is compatible with 2.X...");
if !data.version.starts_with("2.") {
return Err(Version)
}
Ok(data)
}
}
/// An error encountered during [`NodeInfo1::get`] or [`NodeInfo2::get`].
2024-11-13 06:31:39 +00:00
#[derive(Debug, Error)]
pub enum NodeInfoGetError {
/// The HTTP request failed.
2024-11-13 06:31:39 +00:00
#[error("the HTTP request failed")]
Request(reqwest::Error),
2024-11-19 02:31:07 +00:00
/// The `Content-Type` header of the response is missing.
2024-11-13 06:31:39 +00:00
#[error("the Content-Type header of the response is missing")]
ContentTypeMissing,
2024-11-19 02:31:07 +00:00
/// The `Content-Type` header of the response can't be converted to a [`str`].
#[error("the Content-Type header of the response cannot be converted to a &str")]
ContentTypeUnprintable(reqwest::header::ToStrError),
/// The `Content-Type` header of the response is not a valid [`mime::Mime`] type.
#[error("the Content-Type header of the response is not a valid media type")]
2024-11-19 05:06:48 +00:00
ContentTypeInvalid(mediatype::MediaTypeError),
2024-11-19 02:31:07 +00:00
/// The `Content-Type` header of the response is not a supported [`mime::Mime`] type.
#[error("the Content-Type header of the response is not a supported media type")]
ContentTypeUnsupported,
2024-11-13 06:31:39 +00:00
/// The document failed to be parsed as JSON by [`reqwest`].
2024-11-13 06:31:39 +00:00
#[error("the document failed to be parsed as JSON")]
Parse(reqwest::Error),
2024-11-13 06:31:39 +00:00
/// The returned NodeInfo version would not match the version of the called method.
#[error("the returned NodeInfo version would not match the version of the called method")]
Version,
}