//! Wikidata SPARQL enrichment — artist QID, MusicBrainz ID, label, genres. use serde::{Deserialize, Serialize}; use tracing::{info, warn}; const SPARQL: &str = "https://query.wikidata.org/sparql"; const UA: &str = "RetrosyncMediaGroup/1.0 (https://retrosync.media)"; #[derive(Debug, Clone, Serialize, Deserialize, Default)] pub struct WikidataArtist { pub qid: Option, pub wikidata_url: Option, pub musicbrainz_id: Option, pub label_name: Option, pub label_qid: Option, pub country: Option, pub genres: Vec, pub website: Option, pub known_isrcs: Vec, } #[derive(Deserialize)] struct SparqlResp { results: SparqlResults, } #[derive(Deserialize)] struct SparqlResults { bindings: Vec, } pub async fn lookup_artist(name: &str) -> WikidataArtist { match lookup_inner(name).await { Ok(a) => a, Err(e) => { warn!(artist=%name, err=%e, "Wikidata failed"); WikidataArtist::default() } } } async fn lookup_inner(name: &str) -> anyhow::Result { let safe = name.replace('"', "\\\""); let query = format!( r#" SELECT DISTINCT ?artist ?mbid ?label ?labelLabel ?country ?countryLabel ?genre ?genreLabel ?website ?isrc WHERE {{ ?artist rdfs:label "{safe}"@en . {{ ?artist wdt:P31/wdt:P279* wd:Q5 }} UNION {{ ?artist wdt:P31 wd:Q215380 }} OPTIONAL {{ ?artist wdt:P434 ?mbid }} OPTIONAL {{ ?artist wdt:P264 ?label }} OPTIONAL {{ ?artist wdt:P27 ?country }} OPTIONAL {{ ?artist wdt:P136 ?genre }} OPTIONAL {{ ?artist wdt:P856 ?website }} OPTIONAL {{ ?artist wdt:P1243 ?isrc }} SERVICE wikibase:label {{ bd:serviceParam wikibase:language "en" }} }} LIMIT 20"# ); let client = reqwest::Client::builder() .user_agent(UA) .timeout(std::time::Duration::from_secs(10)) .build()?; let resp = client .get(SPARQL) .query(&[("query", &query), ("format", &"json".to_string())]) .send() .await? .json::() .await?; let b = &resp.results.bindings; if b.is_empty() { return Ok(WikidataArtist::default()); } let ext = |key: &str| -> Option { b[0][key]["value"].as_str().map(|s| s.into()) }; let qid = ext("artist") .as_ref() .and_then(|u| u.rsplit('/').next().map(|s| s.into())); let wikidata_url = qid .as_ref() .map(|q| format!("https://www.wikidata.org/wiki/{q}")); let mut genres = Vec::new(); let mut known_isrcs = Vec::new(); for row in b { if let Some(g) = row["genreLabel"]["value"].as_str() { let g = g.to_string(); if !genres.contains(&g) { genres.push(g); } } if let Some(i) = row["isrc"]["value"].as_str() { let i = i.to_string(); if !known_isrcs.contains(&i) { known_isrcs.push(i); } } } let a = WikidataArtist { qid, wikidata_url, musicbrainz_id: ext("mbid"), label_name: ext("labelLabel"), label_qid: ext("label").and_then(|u| u.rsplit('/').next().map(|s| s.into())), country: ext("countryLabel"), genres, website: ext("website"), known_isrcs, }; info!(artist=%name, qid=?a.qid, "Wikidata enriched"); Ok(a) } pub async fn isrc_exists(isrc: &str) -> bool { let query = format!( r#"ASK {{ ?item wdt:P1243 "{}" }}"#, isrc.replace('"', "\\\"") ); #[derive(Deserialize)] struct AskResp { boolean: bool, } let client = reqwest::Client::builder() .user_agent(UA) .timeout(std::time::Duration::from_secs(5)) .build() .unwrap_or_default(); match client .get(SPARQL) .query(&[("query", &query), ("format", &"json".to_string())]) .send() .await { Ok(r) => r .json::() .await .map(|a| a.boolean) .unwrap_or(false), Err(_) => false, } }