Compare commits

...

44 commits

Author SHA1 Message Date
be740ddbbd
Create hostmeta and nodeinfo crates (#1)
Reviewed-on: #1
2024-11-11 07:48:59 +00:00
c2da1723cf
core: Re-export all subcrates 2024-11-11 08:46:16 +01:00
77e76d9dfb
core: Blank out the file 2024-11-11 08:43:35 +01:00
a7d095fee0
Add some sort of docs 2024-11-11 08:43:22 +01:00
b46a995c09
i lied and i did it now 2024-11-11 08:31:09 +01:00
9fb2fd3b0a
Final touches, will reorganize when not asleep 2024-11-11 08:20:25 +01:00
3d8f34a84c
hostmeta: Integrate webfinger specification 2024-11-11 06:07:24 +01:00
13f6a4e4c7
Allow tabs-in-doc-comments
(No reason not to.)
2024-11-11 06:06:41 +01:00
1ba354a1a5
hostmeta: Fully qualify reqwest::header::HeaderValue 2024-11-11 04:25:06 +01:00
5ecb0bb615
nodeinfo: Give up on following the spec and implement things loosely 2024-11-11 04:11:53 +01:00
61d8bbea95
nodeinfo: Move spec submodule to src 2024-11-11 03:34:31 +01:00
83f61395fa
nodeinfo: Add serde_json dep 2024-11-11 03:28:13 +01:00
2be3d76f4a
nodeinfo: Add jsonschema dep 2024-11-11 03:24:22 +01:00
1d4f190630
nodeinfo: Add basics 2024-11-11 03:20:48 +01:00
7bc09aaed4
nodeinfo: Create crate 2024-11-11 03:11:39 +01:00
a936986198
nodeinfo: Rename to hostmeta 2024-11-11 03:06:34 +01:00
c12b0a7869
nodeinfo: Report error if MIME type check fails 2024-11-11 03:04:20 +01:00
3cfc86a8fd
nodeinfo: Setup test logging, sort of 2024-11-11 03:01:46 +01:00
dba9a93e2e
nodeinfo: Fix typo in test_nodeinfo 2024-11-11 02:33:24 +01:00
76783eabe4
nodeinfo: Allow MIME type of application/jrd+json 2024-11-11 02:18:57 +01:00
a74ee45f6e
nodeinfo: Add ignored test for hollo 2024-11-11 02:13:58 +01:00
0001c43f79
nodeinfo: Add ignored test for snac 2024-11-11 02:12:36 +01:00
2aeed9773f
nodeinfo: Improve XRD compatibility 2024-11-09 16:20:36 +01:00
6ca0b5368a
nodeinfo: Add more tests 2024-11-09 16:18:46 +01:00
ee2f386cf0
nodeinfo: Alias PascalCase tags to snake_case ones 2024-11-09 15:03:54 +01:00
e5f52a760a
nodeinfo: Don't Serialize, just Deserialize for now 2024-11-09 15:02:44 +01:00
d964f1befc
nodeinfo: Split off Content-Type parameters before checking the MIME type 2024-11-09 14:57:55 +01:00
de5fdf26f3
nodeinfo: Use header names from reqwest::header 2024-11-09 14:17:51 +01:00
707d6d3d54
nodeinfo: Add two basic (failing) tests 2024-11-09 13:33:08 +01:00
00befc432d
nodeinfo: Derive Debug on all structs 2024-11-09 13:27:26 +01:00
f9da277845
nodeinfo: Add rt-multi-thread and macros features to tokio 2024-11-09 13:25:47 +01:00
2f8e231b04
nodeinfo: Add tokio and pretty_env_logger deps 2024-11-09 13:23:31 +01:00
585eb1476e
nodeinfo: this is actually host-meta 2024-11-09 13:14:11 +01:00
a6672315ff
nodeinfo: Add overlapped-lists feature to quick-xml 2024-11-09 12:58:43 +01:00
370895e7be
nodeinfo: Add serialize feature to quick-xml 2024-11-09 12:50:27 +01:00
d57811cfb7
nodeinfo: Add quick-xml dep 2024-11-09 12:50:27 +01:00
c5e3fcfc99
Create crate 2024-11-09 11:01:26 +01:00
1a374d3d98
nodeinfo: Add derive feature to serde 2024-11-09 11:01:06 +01:00
e795f1ad67
Add .editorconfig 2024-11-09 10:27:59 +01:00
27a9b2fa08
nodeinfo: Add log dep 2024-11-09 10:25:05 +01:00
e0019c8772
nodeinfo: Add json feature to reqwest 2024-11-09 10:08:54 +01:00
bab93b3f55
nodeinfo: Add serde and serde_json deps 2024-11-09 09:59:27 +01:00
949a84922b
nodeinfo: Add reqwest dep 2024-11-09 09:58:12 +01:00
be7b1ae3f7
core: Create nodeinfo table 2024-11-09 09:52:32 +01:00
13 changed files with 1217 additions and 24 deletions

21
.editorconfig Normal file
View file

@ -0,0 +1,21 @@
root = true
[*]
charset = utf-8
end_of_line = lf
indent_size = 4
indent_style = tab
insert_final_newline = true
tab_width = 4
[*.yml]
indent_size = 2
indent_style = space
[*.md]
indent_size = 3
indent_style = space
[*.rst]
indent_size = 3
indent_style = space

0
.gitmodules vendored Normal file
View file

View file

@ -1,3 +1,3 @@
[workspace]
resolver = "2"
members = ["acrate-core", "acrate-nodeinfo"]
members = ["acrate-core", "acrate-hostmeta", "acrate-nodeinfo"]

View file

@ -6,3 +6,8 @@ edition = "2021"
[dependencies]
diesel = "2.2.4"
diesel_migrations = "2.2.0"
acrate-hostmeta = { path = "../acrate-hostmeta" }
acrate-nodeinfo = { path = "../acrate-nodeinfo" }
[lints.clippy]
tabs-in-doc-comments = "allow"

View file

@ -0,0 +1 @@
DROP TABLE nodeinfo;

View file

@ -0,0 +1,9 @@
CREATE TABLE nodeinfo (
nodeinfo_schema VARCHAR NOT NULL,
nodeinfo_href VARCHAR NOT NULL,
nodeinfo_data JSON NOT NULL,
last_updated TIMESTAMP NOT NULL,
PRIMARY KEY(nodeinfo_href)
);

View file

@ -1,14 +1,4 @@
pub fn add(left: u64, right: u64) -> u64 {
left + right
}
//! Core crate of the `acrate` project.
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn it_works() {
let result = add(2, 2);
assert_eq!(result, 4);
}
}
pub use acrate_nodeinfo as nodeinfo;
pub use acrate_hostmeta as hostmeta;

View file

@ -0,0 +1,19 @@
[package]
name = "acrate-hostmeta"
version = "0.1.0"
edition = "2021"
[dependencies]
log = "0.4.22"
quick-xml = { version = "0.37.0", features = ["overlapped-lists", "serialize"] }
reqwest = { version = "0.12.9", features = ["json", "stream"] }
serde = { version = "1.0.214", features = ["derive"] }
serde_json = "1.0.132"
[dev-dependencies]
pretty_env_logger = "0.5.0"
tokio = { version = "1.41.1", features = ["macros", "rt-multi-thread"] }
tokio-test = "0.4.4"
[lints.clippy]
tabs-in-doc-comments = "allow"

548
acrate-hostmeta/src/lib.rs Normal file
View file

@ -0,0 +1,548 @@
//! Resource descriptior deserializer.
//!
//! # Specification
//!
//! - <https://datatracker.ietf.org/doc/html/rfc6415>
//! - <https://datatracker.ietf.org/doc/html/rfc7033>
use std::collections::HashMap;
use serde::Deserialize;
/// A resource descriptor object.
///
/// # Specification
///
/// - <https://datatracker.ietf.org/doc/html/rfc6415#section-3>
/// - <https://datatracker.ietf.org/doc/html/rfc7033#section-4.4>
///
#[derive(Debug, Clone, Deserialize)]
pub struct ResourceDescriptor {
/// The resource this document refers to.
///
/// # Specification
///
/// - <https://datatracker.ietf.org/doc/html/rfc7033#section-4.4.1>
///
#[serde(alias = "Subject")]
pub subject: Option<String>,
/// Other names the resource described by this document can be referred to.
///
/// # Specification
///
/// - <https://datatracker.ietf.org/doc/html/rfc7033#section-4.4.2>
///
#[serde(alias = "Alias")]
pub aliases: Option<Vec<String>>,
/// Additional information about the resource described by this document.
///
/// # Specification
///
/// - <https://datatracker.ietf.org/doc/html/rfc7033#section-4.4.3>
///
#[serde(alias = "Property")]
pub properties: Option<Vec<ResourceDescriptorProperty>>,
/// Links established between the [`Self::subject`] and other resources.
///
/// # Specification
///
/// - <https://datatracker.ietf.org/doc/html/rfc6415#section-3.1.1>
/// - <https://datatracker.ietf.org/doc/html/rfc7033#section-4.4.4>
///
#[serde(alias = "Link")]
pub links: Option<Vec<ResourceDescriptorLink>>,
}
/// A link element, which puts the subject resource in relation with another.
///
/// # Specification
///
/// - <https://datatracker.ietf.org/doc/html/rfc6415#section-3.1.1>
///
#[derive(Debug, Clone, Deserialize)]
pub struct ResourceDescriptorLink {
/// The kind of relation established by the subject with the attached resource.
///
/// # Specification
///
/// - <https://datatracker.ietf.org/doc/html/rfc7033#section-4.4.4.1>
///
#[serde(alias = "@rel")]
pub rel: String,
/// The media type of the resource put in relation.
///
/// # Specification
///
/// - <https://datatracker.ietf.org/doc/html/rfc7033#section-4.4.4.2>
///
#[serde(alias = "@type")]
pub r#type: Option<String>,
/// URI to the resource put in relation.
///
/// # Specification
///
/// - <https://datatracker.ietf.org/doc/html/rfc7033#section-4.4.4.3>
///
#[serde(alias = "@href")]
pub href: Option<String>,
/// Titles of the resource put in relation in various languages.
///
/// # Specification
///
/// - <https://datatracker.ietf.org/doc/html/rfc7033#section-4.4.4.4>
///
pub titles: Option<Vec<HashMap<String, String>>>,
/// Additional information about the resource put in relation.
///
/// # Specification
///
/// - <https://datatracker.ietf.org/doc/html/rfc7033#section-4.4.4.5>
///
pub properties: Option<Vec<ResourceDescriptorProperty>>,
/// Template to fill to get the URL to resource-specific information.
///
/// # Specification
///
/// - <https://datatracker.ietf.org/doc/html/rfc6415#section-4.2>
///
#[serde(alias = "@template")]
pub template: Option<String>,
}
/// A property element, which describes a certain aspect of the subject resource.
///
/// # Specification
///
/// - <https://datatracker.ietf.org/doc/html/rfc7033#section-4.4.3>
///
#[derive(Debug, Clone, Deserialize)]
pub struct ResourceDescriptorProperty {
/// The property identifier, or type.
#[serde(alias = "@type")]
pub r#type: String,
/// The property value.
pub value: Option<String>,
}
impl ResourceDescriptor {
/// Get a JRD (JSON [`ResourceDescriptor`]).
///
/// # Notes
///
/// This follows redirects until the redirect chain is 10 hops; see [`reqwest::redirect`] for more info.
///
/// # Examples
///
/// ```
/// # tokio_test::block_on(async {
/// use acrate_hostmeta::ResourceDescriptor;
///
/// let client = reqwest::Client::new();
/// let url: reqwest::Url = "https://junimo.party/.well-known/nodeinfo".parse()
/// .expect("URL to be valid");
///
/// let rd = ResourceDescriptor::get_jrd(&client, url)
/// .await
/// .expect("JRD to be processed correctly");
/// # })
/// ```
///
pub async fn get_jrd(client: &reqwest::Client, url: reqwest::Url) -> Result<Self, GetJRDError> {
use GetJRDError::*;
log::debug!("Getting JRD document at: {url}");
log::trace!("Building request...");
let request = {
log::trace!("Creating new request...");
let mut request = reqwest::Request::new(reqwest::Method::GET, url);
log::trace!("Setting request headers...");
let headers = request.headers_mut();
log::trace!("Setting `Accept: application/json`...");
let _ = headers.insert(reqwest::header::ACCEPT, "application/json".parse().unwrap());
request
};
log::trace!("Sending request...");
let response = client.execute(request)
.await
.map_err(Request)?;
log::trace!("Checking `Content-Type` of the response...");
let content_type = response
.headers()
.get(reqwest::header::CONTENT_TYPE)
.ok_or(ContentTypeMissing)?;
log::trace!("Extracting MIME type from the `Content-Type` header...");
let mime_type = extract_mime_from_content_type(content_type)
.ok_or(ContentTypeInvalid)?;
log::trace!("Ensuring MIME type is acceptable for JRD parsing...");
if !(mime_type == "application/json" || mime_type == "application/jrd+json") {
log::error!("MIME type `{mime_type}` is not acceptable for JRD parsing.");
return Err(ContentTypeInvalid)
}
log::trace!("Attempting to parse response as JSON...");
let data = response.json::<Self>()
.await
.map_err(Parse)?;
Ok(data)
}
/// Get a XRD (Extensible [`ResourceDescriptor`]).
///
/// # Notes
///
/// This follows redirects until the redirect chain is 10 hops; see [`reqwest::redirect`] for more info.
///
/// # Examples
///
/// ```
/// # tokio_test::block_on(async {
/// use acrate_hostmeta::ResourceDescriptor;
///
/// let client = reqwest::Client::new();
/// let url: reqwest::Url = "https://junimo.party/.well-known/host-meta".parse()
/// .expect("URL to be valid");
///
/// let rd = ResourceDescriptor::get_xrd(&client, url)
/// .await
/// .expect("XRD to be processed correctly");
/// # })
/// ```
///
pub async fn get_xrd(client: &reqwest::Client, url: reqwest::Url) -> Result<Self, GetXRDError> {
use GetXRDError::*;
log::debug!("Getting host-meta XRD document at: {url}");
log::trace!("Building request...");
let request = {
log::trace!("Creating new request...");
let mut request = reqwest::Request::new(reqwest::Method::GET, url);
log::trace!("Setting request headers...");
let headers = request.headers_mut();
log::trace!("Setting `Accept: application/xrd+xml`...");
let _ = headers.insert(reqwest::header::ACCEPT, "application/xrd+xml".parse().unwrap());
request
};
log::trace!("Sending request...");
let response = client.execute(request)
.await
.map_err(Request)?;
log::trace!("Checking `Content-Type` of the response...");
let content_type = response
.headers()
.get(reqwest::header::CONTENT_TYPE)
.ok_or(ContentTypeMissing)?;
log::trace!("Extracting MIME type from the `Content-Type` header...");
let mime_type = extract_mime_from_content_type(content_type)
.ok_or(ContentTypeInvalid)?;
log::trace!("Ensuring MIME type is acceptable for XRD parsing...");
if mime_type != "application/xrd+xml" {
log::error!("MIME type `{mime_type}` is not acceptable for XRD parsing.");
return Err(ContentTypeInvalid)
}
log::trace!("Attempting to parse response as text...");
let data = response.text()
.await
.map_err(Decode)?;
log::trace!("Parsing response as XML...");
let data = quick_xml::de::from_str::<Self>(&data)
.map_err(Parse)?;
Ok(data)
}
/// Attempt to discover a [`ResourceDescriptor`] at the given URL in various ways.
///
/// In order, this method attempts:
///
/// 1. HTTPS [XRD](Self::get_xrd)
/// 2. HTTPS [JRD](Self::get_jrd)
/// 3. HTTPS [JRD](Self::get_jrd) with .json path extension
/// 4. HTTP [XRD](Self::get_xrd)
/// 5. HTTP [JRD](Self::get_jrd)
/// 6. HTTP [JRD](Self::get_jrd) with .json path extension
///
/// # Notes
///
/// This follows redirects until the redirect chain is 10 hops; see [`reqwest::redirect`] for more info.
///
/// # Examples
///
/// ```
/// # tokio_test::block_on(async {
/// use acrate_hostmeta::ResourceDescriptor;
///
/// let client = reqwest::Client::new();
/// let url: reqwest::Url = "https://junimo.party/.well-known/host-meta".parse()
/// .expect("URL to be valid");
///
/// let rd = ResourceDescriptor::discover(&client, url)
/// .await
/// .expect("resource descriptor to be discovered correctly");
/// # })
/// ```
///
pub async fn discover(client: &reqwest::Client, mut url: reqwest::Url) -> Result<Self, ResourceDescriptorDiscoveryError> {
use ResourceDescriptorDiscoveryError::*;
log::debug!("Discovering resource descriptor document at: {url}");
log::trace!("Unsetting URL query...");
url.set_query(None);
log::trace!("Unsetting URL fragment...");
url.set_fragment(None);
log::trace!("Setting URL scheme to HTTPS...");
url.set_scheme("https")
.map_err(UrlManipulation)?;
log::trace!("Cloning URL for HTTPS XRD retrieval...");
let https_xrd_url = url.clone();
log::trace!("Attempting HTTPS XRD retrieval...");
let https_xrd = match Self::get_xrd(client, https_xrd_url).await {
Ok(data) => {
log::trace!("HTTPS XRD retrieval was successful, returning...");
return Ok(data)
}
Err(err) => {
log::warn!("HTTPS XRD retrieval failed.");
err
}
};
log::trace!("Cloning URL for HTTPS JRD retrieval...");
let https_jrd_url = url.clone();
log::trace!("Attempting HTTPS JRD retrieval...");
let https_jrd = match Self::get_jrd(client, https_jrd_url).await {
Ok(data) => {
log::trace!("HTTPS JRD retrieval was successful, returning...");
return Ok(data)
}
Err(err) => {
log::warn!("HTTPS JRD retrieval failed.");
err
}
};
log::trace!("Cloning URL for HTTPS JRD .json retrieval...");
let mut https_jrdj_url = url.clone();
log::trace!("Altering URL path for HTTPS JRD .json retrieval...");
https_jrdj_url.set_path(
&format!("{}.json", https_jrdj_url.path())
);
log::trace!("Attempting HTTPS JRD .json retrieval...");
let https_jrdj = match Self::get_jrd(client, https_jrdj_url).await {
Ok(data) => {
log::trace!("HTTPS JRD .json retrieval was successful, returning...");
return Ok(data)
}
Err(err) => {
log::warn!("HTTPS JRD .json retrieval failed.");
err
}
};
log::trace!("Setting URL scheme to HTTP...");
url.set_scheme("http")
.map_err(UrlManipulation)?;
log::trace!("Cloning URL for HTTP XRD retrieval...");
let http_xrd_url = url.clone();
log::trace!("Attempting HTTP XRD retrieval...");
let http_xrd = match Self::get_xrd(client, http_xrd_url).await {
Ok(data) => {
log::trace!("HTTP XRD retrieval was successful, returning...");
return Ok(data)
}
Err(err) => {
log::warn!("HTTP XRD retrieval failed.");
err
}
};
log::trace!("Cloning URL for HTTP JRD retrieval...");
let http_jrd_url = url.clone();
log::trace!("Attempting HTTP JRD retrieval...");
let http_jrd = match Self::get_jrd(client, http_jrd_url).await {
Ok(data) => {
log::trace!("HTTP JRD retrieval was successful, returning...");
return Ok(data)
}
Err(err) => {
log::warn!("HTTP JRD retrieval failed.");
err
}
};
log::trace!("Cloning URL for HTTP JRD .json retrieval...");
let mut http_jrdj_url = url.clone();
log::trace!("Altering URL path for HTTPS JRD .json retrieval...");
http_jrdj_url.set_path(
&format!("{}.json", http_jrdj_url.path())
);
log::trace!("Attempting HTTP JRD .json retrieval...");
let http_jrdj = match Self::get_jrd(client, http_jrdj_url).await {
Ok(data) => {
log::trace!("HTTP JRD .json retrieval was successful, returning...");
return Ok(data)
}
Err(err) => {
log::warn!("HTTP JRD .json retrieval failed.");
err
}
};
Err(
ResourceDescriptorDiscoveryError::Fetch(
ResourceDescriptorDiscoveryFailures {
https_xrd,
https_jrd,
https_jrdj,
http_xrd,
http_jrd,
http_jrdj,
}
)
)
}
/// Well-known path for host-meta documents.
///
/// # Specification
///
/// - <https://datatracker.ietf.org/doc/html/rfc6415#section-2>
///
pub const WELLKNOWN_HOSTMETA_PATH: &str = "/.well-known/host-meta";
/// Attempt to discover a host-meta document at the given base URL.
///
/// # Examples
///
/// ```
/// # tokio_test::block_on(async {
/// use acrate_hostmeta::ResourceDescriptor;
///
/// let client = reqwest::Client::new();
/// let base: reqwest::Url = "https://junimo.party".parse()
/// .expect("URL to be valid");
///
/// let rd = ResourceDescriptor::discover_hostmeta(&client, base)
/// .await
/// .expect("host-meta to be discovered correctly");
/// })
/// ```
///
pub async fn discover_hostmeta(client: &reqwest::Client, mut base: reqwest::Url) -> Result<Self, ResourceDescriptorDiscoveryError> {
base.set_path(Self::WELLKNOWN_HOSTMETA_PATH);
Self::discover(client, base)
.await
}
}
/// Error occurred during [`ResourceDescriptor::discover`].
#[derive(Debug)]
pub enum ResourceDescriptorDiscoveryError {
/// Manipulation of the provided base [`reqwest::Url`] failed.
///
/// See [reqwest::Url::set_scheme] for possible causes.
UrlManipulation(()),
/// All attempts of fetching a resource descriptor document failed.
Fetch(ResourceDescriptorDiscoveryFailures),
}
/// Request errors occurred during [`ResourceDescriptor::discover`].
#[derive(Debug)]
pub struct ResourceDescriptorDiscoveryFailures {
/// HTTPS XRD retrieval.
pub https_xrd: GetXRDError,
/// HTTPS JRD retrieval.
pub https_jrd: GetJRDError,
/// HTTPS JRD with .json extension retrieval.
pub https_jrdj: GetJRDError,
/// HTTPS XRD retrieval.
pub http_xrd: GetXRDError,
/// HTTP JRD retrieval.
pub http_jrd: GetJRDError,
/// HTTP JRD with .json extension retrieval.
pub http_jrdj: GetJRDError,
}
/// Error occurred during [`ResourceDescriptor::get_xrd`].
#[derive(Debug)]
pub enum GetXRDError {
/// The HTTP request failed.
Request(reqwest::Error),
/// The `Content-Type` header of the response is missing.
ContentTypeMissing,
/// The `Content-Type` header of the response is invalid.
ContentTypeInvalid,
/// The document failed to be read as text.
Decode(reqwest::Error),
/// The document failed to be parsed as XML by [`quick_xml`].
Parse(quick_xml::DeError),
}
/// Error occurred during [`ResourceDescriptor::get_jrd`].
#[derive(Debug)]
pub enum GetJRDError {
/// The HTTP request failed.
Request(reqwest::Error),
/// The `Content-Type` header of the response is missing.
ContentTypeMissing,
/// The `Content-Type` header of the response is invalid.
ContentTypeInvalid,
/// The document failed to be parsed as JSON by [`reqwest`].
Parse(reqwest::Error),
}
/// Extract the MIME type from the value of the `Content-Type` header.
fn extract_mime_from_content_type(value: &reqwest::header::HeaderValue) -> Option<String> {
let value = value.to_str().ok()?;
match value.split_once("; ") {
None => Some(value.to_string()),
Some((mime, _)) => Some(mime.to_string()),
}
}

View file

@ -0,0 +1,62 @@
const CARGO_PKG_NAME: &str = env!("CARGO_PKG_NAME");
const CARGO_PKG_VERSION: &str = env!("CARGO_PKG_VERSION");
const CARGO_PKG_REPOSITORY: &str = env!("CARGO_PKG_REPOSITORY");
fn init_log() {
let mut builder = pretty_env_logger::formatted_builder();
builder.target(pretty_env_logger::env_logger::Target::Stdout);
builder.filter_level(log::LevelFilter::max());
builder.is_test(true);
if builder.try_init().is_ok() {
log::debug!("Initialized logging!");
}
}
fn make_client() -> reqwest::Client {
let user_agent = format!("{CARGO_PKG_NAME}/{CARGO_PKG_VERSION} ({CARGO_PKG_REPOSITORY})");
reqwest::Client::builder()
.user_agent(user_agent)
.build()
.expect("reqwest client to build")
}
macro_rules! test_discover_hostmeta {
($id:ident, $url:literal) => {
test_discover_hostmeta!($id, $url,);
};
($id:ident, $url:literal, $($tag:meta),*) => {
mod $id {
use acrate_hostmeta::*;
use super::*;
#[tokio::test]
$(#[$tag])*
async fn test() {
init_log();
let client = make_client();
let base: reqwest::Url = $url.parse()
.expect("a valid URL");
let doc = ResourceDescriptor::discover_hostmeta(&client, base)
.await
.expect("host-meta discovery to succeed");
log::info!("Parsed host-meta document: {doc:#?}");
}
}
};
}
test_discover_hostmeta!(akkoma, "https://junimo.party");
test_discover_hostmeta!(mastodon, "https://mastodon.social");
test_discover_hostmeta!(misskey, "https://misskey.io");
test_discover_hostmeta!(iceshrimpnet, "https://ice.frieren.quest");
test_discover_hostmeta!(gotosocial, "https://alpha.polymaths.social");
test_discover_hostmeta!(bridgyfed, "https://fed.brid.gy");
test_discover_hostmeta!(threads, "https://threads.net", ignore = "does not support host-meta");
test_discover_hostmeta!(snac, "https://ngoa.giao.loan", ignore = "does not support host-meta");
test_discover_hostmeta!(hollo, "https://hollo.social", ignore = "does not support host-meta");

View file

@ -4,3 +4,16 @@ version = "0.1.0"
edition = "2021"
[dependencies]
acrate-hostmeta = { version = "0.1.0", path = "../acrate-hostmeta" }
log = "0.4.22"
reqwest = { version = "0.12.9", features = ["json", "stream"] }
serde = { version = "1.0.214", features = ["derive"] }
serde_json = "1.0.132"
[dev-dependencies]
pretty_env_logger = "0.5.0"
tokio = { version = "1.41.1", features = ["macros", "rt-multi-thread"] }
tokio-test = "0.4.4"
[lints.clippy]
tabs-in-doc-comments = "allow"

View file

@ -1,14 +1,469 @@
pub fn add(left: u64, right: u64) -> u64 {
left + right
//! Serde-based NodeInfo fetcher and loose parser.
//!
//! > NodeInfo is an effort to create a standardized way of exposing metadata about a server running one of the distributed social networks.
//!
//! # Specification
//!
//! - <https://github.com/jhass/nodeinfo/blob/main/PROTOCOL.md>
use serde::Deserialize;
/// A variant of a NodeInfo document.
///
/// # Specification
///
/// - <https://github.com/jhass/nodeinfo/blob/main/PROTOCOL.md>
#[derive(Debug, Clone)]
pub enum NodeInfo {
V1(NodeInfo1),
V2(NodeInfo2),
}
#[cfg(test)]
mod tests {
use super::*;
/// A NodeInfo document at version 1.X.
///
/// # Specification
///
/// - <https://github.com/jhass/nodeinfo/blob/main/schemas/1.0/schema.json>
/// - <https://github.com/jhass/nodeinfo/blob/main/schemas/1.1/schema.json>
///
#[derive(Debug, Clone, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct NodeInfo1 {
/// The schema version.
pub version: String,
#[test]
fn it_works() {
let result = add(2, 2);
assert_eq!(result, 4);
/// Metadata about server software in use.
pub software: NodeInfo1Software,
/// The protocols supported on this server.
pub protocols: NodeInfo1Protocols,
/// The third party sites this server can connect to via their application API.
pub services: NodeInfo1Services,
/// Whether this server allows open self-registration.
pub open_registrations: bool,
/// Usage statistics for this server.
pub usage: NodeInfo1Usage,
/// Free form key value pairs for software specific values.
///
/// Clients should not rely on any specific key present.
pub metadata: serde_json::Value,
}
/// A NodeInfo document at version 2.X.
///
/// # Specification
///
/// - <https://github.com/jhass/nodeinfo/blob/main/schemas/2.0/schema.json>
/// - <https://github.com/jhass/nodeinfo/blob/main/schemas/2.1/schema.json>
/// - <https://github.com/jhass/nodeinfo/blob/main/schemas/2.2/schema.json>
///
#[derive(Debug, Clone, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct NodeInfo2 {
/// The schema version.
pub version: String,
/// Metadata specific to the instance. An instance is a the concrete installation of a software running on a server.
pub instance: Option<NodeInfo2Instance>,
/// Metadata about server software in use.
pub software: NodeInfo1Software,
/// The protocols supported on this server.
pub protocols: Vec<String>,
/// The third party sites this server can connect to via their application API.
pub services: Option<NodeInfo1Services>,
/// Whether this server allows open self-registration.
pub open_registrations: bool,
/// Usage statistics for this server.
pub usage: Option<NodeInfo1Usage>,
/// Free form key value pairs for software specific values.
///
/// Clients should not rely on any specific key present.
pub metadata: serde_json::Value,
}
#[derive(Debug, Clone, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct NodeInfo1Software {
/// The canonical name of this server software.
pub name: String,
/// The version of this server software.
pub version: String,
pub repository: Option<String>,
pub homepage: Option<String>,
}
#[derive(Debug, Clone, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct NodeInfo1Protocols {
/// The protocols this server can receive traffic for.
pub inbound: Vec<String>,
/// The protocols this server can generate traffic for.
pub outbound: Vec<String>,
}
#[derive(Debug, Clone, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct NodeInfo1Services {
/// The third party sites this server can retrieve messages from for combined display with regular traffic.
pub inbound: Vec<String>,
/// The third party sites this server can publish messages to on the behalf of a user.
pub outbound: Vec<String>,
}
#[derive(Debug, Clone, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct NodeInfo1Usage {
/// Statistics about the users of this server.
pub users: Option<NodeInfo1UsageUsers>,
/// The amount of posts that were made by users that are registered on this server.
pub local_posts: Option<i32>,
/// The amount of comments that were made by users that are registered on this server.
pub local_comments: Option<i32>,
}
#[derive(Debug, Clone, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct NodeInfo1UsageUsers {
/// The total amount of on this server registered users.
pub total: Option<i32>,
/// The amount of users that signed in at least once in the last 180 days.
pub active_halfyear: Option<i32>,
/// The amount of users that signed in at least once in the last 30 days.
pub active_month: Option<i32>,
/// The amount of users that signed in at least once in the last 7 days.
pub active_week: Option<i32>,
}
#[derive(Debug, Clone, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct NodeInfo2Instance {
/// If supported by the software, the administrator-configured name of this instance.
pub name: String,
/// If supported by the software, the administrator-configured long form description of this instance.
pub description: String,
}
impl NodeInfo {
/// Well-known path for NodeInfo documents.
///
/// # Specification
///
/// - <https://github.com/jhass/nodeinfo/blob/main/PROTOCOL.md#discovery>
pub const WELLKNOWN_NODEINFO_PATH: &str = "/.well-known/nodeinfo";
/// Discover and get the latest NodeInfo version available given a certain base URL.
///
/// # Examples
///
/// ```
/// # tokio_test::block_on(async {
/// use acrate_nodeinfo::NodeInfo;
///
/// let client = reqwest::Client::new();
/// let url: reqwest::Url = "https://mastodon.online".parse()
/// .expect("URL to be valid");
///
/// let ni = NodeInfo::get_latest_wellknown(&client, url)
/// .await
/// .expect("NodeInfo to be processed correctly");
///
/// let version = match ni {
/// NodeInfo::V1(ni) => ni.version,
/// NodeInfo::V2(ni) => ni.version,
/// };
///
/// assert_eq!(version, "2.0");
/// # })
pub async fn get_latest_wellknown(client: &reqwest::Client, mut base: reqwest::Url) -> Result<Self, NodeInfoGetWellknownError> {
use NodeInfoGetWellknownError::*;
log::debug!("Getting well-known NodeInfo document at base: {base}");
log::trace!("Setting URL path to the well-known NodeInfo value...");
base.set_path(Self::WELLKNOWN_NODEINFO_PATH);
log::trace!("Discovering NodeInfo document locations...");
let discovery = acrate_hostmeta::ResourceDescriptor::discover(client, base)
.await
.map_err(Discovery)?;
log::trace!("Getting a list of NodeInfo document links...");
let mut links = discovery.links.unwrap_or_default();
links.sort_unstable_by_key(|o| o.rel.clone()); // TODO: Performance can be improved.
links.reverse();
for link in links.into_iter() {
log::trace!("Checking discovered link href...");
let url = match link.href {
None => {
log::warn!("Discovered link does not have an href, skipping...");
continue
},
Some(href) => {
log::trace!("Discovered link has an href, processing...");
href
},
};
log::trace!("Parsing discovered link href...");
let url: reqwest::Url = match url.parse() {
Err(e) => {
log::warn!("Discovered link has an invalid URL as href, skipping: {e:#?}");
continue
},
Ok(url) => {
log::trace!("Discovered link has a valid URL, processing...");
url
},
};
let rel = link.rel;
let nodeinfo = match rel.as_str() {
"http://nodeinfo.diaspora.software/ns/schema/1.0" => match NodeInfo1::get(client, url).await {
Err(e) => {
log::warn!("Failed to get NodeInfo v1.0 document, skipping: {e:#?}");
continue;
},
Ok(nodeinfo) => {
log::trace!("Successfully processed NodeInfo v1.0 document!");
Self::V1(nodeinfo)
}
}
"http://nodeinfo.diaspora.software/ns/schema/1.1" => match NodeInfo1::get(client, url).await {
Err(e) => {
log::warn!("Failed to get NodeInfo v1.1 document, skipping: {e:#?}");
continue;
},
Ok(nodeinfo) => {
log::trace!("Successfully processed NodeInfo v1.1 document!");
Self::V1(nodeinfo)
}
}
"http://nodeinfo.diaspora.software/ns/schema/2.0" => match NodeInfo2::get(client, url).await {
Err(e) => {
log::warn!("Failed to get NodeInfo v2.0 document, skipping: {e:#?}");
continue;
},
Ok(nodeinfo) => {
log::trace!("Successfully processed NodeInfo v2.0 document!");
Self::V2(nodeinfo)
}
}
"http://nodeinfo.diaspora.software/ns/schema/2.1" => match NodeInfo2::get(client, url).await {
Err(e) => {
log::warn!("Failed to get NodeInfo v2.1 document, skipping: {e:#?}");
continue;
},
Ok(nodeinfo) => {
log::trace!("Successfully processed NodeInfo v2.1 document!");
Self::V2(nodeinfo)
}
}
"http://nodeinfo.diaspora.software/ns/schema/2.2" => match NodeInfo2::get(client, url).await {
Err(e) => {
log::warn!("Failed to get NodeInfo v2.2 document, skipping: {e:#?}");
continue;
},
Ok(nodeinfo) => {
log::trace!("Successfully processed NodeInfo v2.2 document!");
Self::V2(nodeinfo)
}
}
_ => {
log::warn!("Discovered link has unknown rel `{rel}`, skipping.");
continue;
},
};
log::trace!("Successfully retrieved latest NodeInfo: {nodeinfo:#?}");
return Ok(nodeinfo);
}
log::warn!("Ran out of possible NodeInfo sources, returning an Unsupported error.");
Err(Unsupported)
}
}
/// An error occurred during [`NodeInfo::get_latest_wellknown`].
#[derive(Debug)]
pub enum NodeInfoGetWellknownError {
/// The discovery of possible locations for NodeInfo documents failed.
Discovery(acrate_hostmeta::ResourceDescriptorDiscoveryError),
/// No compatible NodeInfo documents were detected at the given URL.
Unsupported,
}
impl NodeInfo1 {
/// Get a NodeInfo v1.X document.
pub async fn get(client: &reqwest::Client, url: reqwest::Url) -> Result<Self, NodeInfoGetError> {
use NodeInfoGetError::*;
log::debug!("Getting NodeInfo v1.X document at: {url}");
log::trace!("Building request...");
let request = {
log::trace!("Creating new request...");
let mut request = reqwest::Request::new(reqwest::Method::GET, url);
log::trace!("Setting request headers...");
let headers = request.headers_mut();
log::trace!("Setting `Accept: application/json`...");
let _ = headers.insert(reqwest::header::ACCEPT, "application/json".parse().unwrap());
request
};
log::trace!("Sending request...");
let response = client.execute(request)
.await
.map_err(Request)?;
log::trace!("Checking `Content-Type` of the response...");
let content_type = response
.headers()
.get(reqwest::header::CONTENT_TYPE)
.ok_or(ContentTypeMissing)?;
log::trace!("Extracting MIME type from the `Content-Type` header...");
let mime_type = extract_mime_from_content_type(content_type)
.ok_or(ContentTypeInvalid)?;
log::trace!("Ensuring MIME type is acceptable for NodeInfo documents...");
if mime_type != "application/json" {
log::error!("MIME type `{mime_type}` is not acceptable for NodeInfo documents.");
return Err(ContentTypeInvalid)
}
log::trace!("Attempting to parse response as JSON...");
let data = response.json::<Self>()
.await
.map_err(Parse)?;
log::trace!("Making sure version is compatible with 1.X...");
if !data.version.starts_with("1.") {
return Err(Version);
}
Ok(data)
}
}
impl NodeInfo2 {
/// Get a NodeInfo v2.X document.
///
/// # Examples
///
/// ```
/// # tokio_test::block_on(async {
/// use acrate_nodeinfo::NodeInfo2;
///
/// let client = reqwest::Client::new();
/// let url: reqwest::Url = "https://junimo.party/nodeinfo/2.1.json".parse()
/// .expect("URL to be valid");
///
/// let rd = NodeInfo2::get(&client, url)
/// .await
/// .expect("NodeInfo to be obtained correctly");
/// # })
/// ```
pub async fn get(client: &reqwest::Client, url: reqwest::Url) -> Result<Self, NodeInfoGetError> {
use NodeInfoGetError::*;
log::debug!("Getting NodeInfo v2.X document at: {url}");
log::trace!("Building request...");
let request = {
log::trace!("Creating new request...");
let mut request = reqwest::Request::new(reqwest::Method::GET, url);
log::trace!("Setting request headers...");
let headers = request.headers_mut();
log::trace!("Setting `Accept: application/json`...");
let _ = headers.insert(reqwest::header::ACCEPT, "application/json".parse().unwrap());
request
};
log::trace!("Sending request...");
let response = client.execute(request)
.await
.map_err(Request)?;
log::trace!("Checking `Content-Type` of the response...");
let content_type = response
.headers()
.get(reqwest::header::CONTENT_TYPE)
.ok_or(ContentTypeMissing)?;
log::trace!("Extracting MIME type from the `Content-Type` header...");
let mime_type = extract_mime_from_content_type(content_type)
.ok_or(ContentTypeInvalid)?;
log::trace!("Ensuring MIME type is acceptable for NodeInfo documents...");
if mime_type != "application/json" {
log::error!("MIME type `{mime_type}` is not acceptable for NodeInfo documents.");
return Err(ContentTypeInvalid)
}
log::trace!("Attempting to parse response as JSON...");
let data = response.json::<Self>()
.await
.map_err(Parse)?;
log::trace!("Making sure version is compatible with 2.X...");
if !data.version.starts_with("2.") {
return Err(Version)
}
Ok(data)
}
}
/// An error encountered during [`NodeInfo1::get`] or [`NodeInfo2::get`].
#[derive(Debug)]
pub enum NodeInfoGetError {
/// The HTTP request failed.
Request(reqwest::Error),
/// The `Content-Type` header of the response is missing.
ContentTypeMissing,
/// The `Content-Type` header of the response is invalid.
ContentTypeInvalid,
/// The document failed to be parsed as JSON by [`reqwest`].
Parse(reqwest::Error),
/// The returned version does not match the version of the created struct.
Version,
}
/// Extract the MIME type from the value of the `Content-Type` header.
fn extract_mime_from_content_type(value: &reqwest::header::HeaderValue) -> Option<String> {
let value = value.to_str().ok()?;
match value.split_once("; ") {
None => Some(value.to_string()),
Some((mime, _)) => Some(mime.to_string()),
}
}

View file

@ -0,0 +1,70 @@
const CARGO_PKG_NAME: &str = env!("CARGO_PKG_NAME");
const CARGO_PKG_VERSION: &str = env!("CARGO_PKG_VERSION");
const CARGO_PKG_REPOSITORY: &str = env!("CARGO_PKG_REPOSITORY");
fn init_log() {
let mut builder = pretty_env_logger::formatted_builder();
builder.target(pretty_env_logger::env_logger::Target::Stdout);
builder.filter_level(log::LevelFilter::max());
builder.is_test(true);
if builder.try_init().is_ok() {
log::debug!("Initialized logging!");
}
}
fn make_client() -> reqwest::Client {
let user_agent = format!("{CARGO_PKG_NAME}/{CARGO_PKG_VERSION} ({CARGO_PKG_REPOSITORY})");
reqwest::Client::builder()
.user_agent(user_agent)
.build()
.expect("reqwest client to build")
}
macro_rules! test {
($id:ident, $url:literal, $version:literal) => {
test!($id, $url, $version,);
};
($id:ident, $url:literal, $version:literal, $($tag:meta),*) => {
mod $id {
use acrate_nodeinfo::*;
use super::*;
#[tokio::test]
$(#[$tag])*
async fn test_version() {
init_log();
let client = make_client();
let base: reqwest::Url = $url.parse()
.expect("a valid URL");
let doc = NodeInfo::get_latest_wellknown(&client, base)
.await
.expect("NodeInfo discovery to succeed");
log::info!("Parsed NodeInfo document: {doc:#?}");
let version = match doc {
NodeInfo::V1(d) => d.version,
NodeInfo::V2(d) => d.version,
};
assert_eq!(version, $version);
}
}
};
}
test!(akkoma, "https://junimo.party", "2.1");
test!(mastodon, "https://mastodon.social", "2.0");
test!(misskey, "https://misskey.io", "2.1");
test!(iceshrimpnet, "https://ice.frieren.quest", "2.1");
test!(gotosocial, "https://alpha.polymaths.social", "2.0");
test!(bridgyfed, "https://fed.brid.gy", "2.1");
test!(threads, "https://threads.net", "", ignore = "does not support NodeInfo");
test!(snac, "https://ngoa.giao.loan", "2.0");
test!(hollo, "https://hollo.social", "2.1");