//! Module to interact with the Minetest Content DB website. use once_cell::sync::Lazy; use scraper::{Html, Selector}; use serde::{Deserialize, Serialize}; use url::Url; use super::error::{Error, Result}; /// The identification of content on Content DB. Consists of the username and the package name. pub type ContentId = (String, String); /// The URL of the default Content DB website to use. pub static DEFAULT_INSTANCE: Lazy = Lazy::new(|| Url::parse("https://content.minetest.net/").expect("Invalid default URL")); /// The metapackage selector to scrape the packages. static PROVIDES_SELECTOR: Lazy = Lazy::new(|| Selector::parse("ul.d-flex").expect("Invalid selector")); static A_SELECTOR: Lazy = Lazy::new(|| Selector::parse("a").expect("Invalid selector")); /// (Partial) metadata of a content item, as returned by the Content DB. #[derive(Debug, Clone, PartialEq, Eq, Hash, Deserialize, Serialize)] pub struct ContentMeta { pub author: String, pub name: String, pub provides: Vec, pub short_description: String, pub title: String, #[serde(rename = "type")] pub typ: String, pub url: Url, } /// The main access point for Content DB queries. #[derive(Debug, Clone)] pub struct ContentDb { base_url: Url, } impl Default for ContentDb { fn default() -> Self { Self::new() } } impl ContentDb { /// Create a new Content DB accessor pointing to the default instance. pub fn new() -> ContentDb { ContentDb { base_url: DEFAULT_INSTANCE.clone(), } } /// Find suitable candidates that provide the given modname. pub fn resolve(&self, modname: &str) -> Result> { let path = format!("metapackages/{}", modname); let endpoint = self .base_url .join(&path) .map_err(|_| Error::InvalidModId(modname.into()))?; let body = ureq::request_url("GET", &endpoint).call()?.into_string()?; let dom = Html::parse_document(&body); let provides = dom .select(&PROVIDES_SELECTOR) .next() .ok_or(Error::InvalidScrape)?; let candidates: Vec = provides .select(&A_SELECTOR) .filter_map(|a| a.value().attr("href")) .filter_map(extract_content_id) .collect(); let mut good_ones = Vec::new(); for (user, package) in candidates { let path = format!("api/packages/{}/{}/", user, package); let endpoint = self .base_url .join(&path) .expect("The parsed path was wrong"); let response: ContentMeta = ureq::request_url("GET", &endpoint).call()?.into_json()?; // While resolving, we only care about actual mods that we can install. If a game // provides a certain metapackage, it is pretty much useless for us (and often just // there because a mod in that game provides the metapackage). if response.typ == "mod" { good_ones.push(response) } } Ok(good_ones) } /// Retrieve the download url for a given package. pub fn download_url(&self, user: &str, package: &str) -> Result { let path = format!("api/packages/{}/{}/", user, package); let endpoint = self .base_url .join(&path) .expect("The parsed path was wrong"); let response: ContentMeta = ureq::request_url("GET", &endpoint).call()?.into_json()?; Ok(response.url) } } fn extract_content_id(path: &str) -> Option { regex!("/packages/([^/]+)/([^/]+)/$") .captures(path) .map(|c| { ( c.get(1).unwrap().as_str().into(), c.get(2).unwrap().as_str().into(), ) }) }