Spaces:
Build error
Build error
File size: 4,164 Bytes
1295969 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 | //! Wikidata SPARQL enrichment — artist QID, MusicBrainz ID, label, genres.
use serde::{Deserialize, Serialize};
use tracing::{info, warn};
const SPARQL: &str = "https://query.wikidata.org/sparql";
const UA: &str = "RetrosyncMediaGroup/1.0 (https://retrosync.media)";
#[derive(Debug, Clone, Serialize, Deserialize, Default)]
pub struct WikidataArtist {
pub qid: Option<String>,
pub wikidata_url: Option<String>,
pub musicbrainz_id: Option<String>,
pub label_name: Option<String>,
pub label_qid: Option<String>,
pub country: Option<String>,
pub genres: Vec<String>,
pub website: Option<String>,
pub known_isrcs: Vec<String>,
}
#[derive(Deserialize)]
struct SparqlResp {
results: SparqlResults,
}
#[derive(Deserialize)]
struct SparqlResults {
bindings: Vec<serde_json::Value>,
}
pub async fn lookup_artist(name: &str) -> WikidataArtist {
match lookup_inner(name).await {
Ok(a) => a,
Err(e) => {
warn!(artist=%name, err=%e, "Wikidata failed");
WikidataArtist::default()
}
}
}
async fn lookup_inner(name: &str) -> anyhow::Result<WikidataArtist> {
let safe = name.replace('"', "\\\"");
let query = format!(
r#"
SELECT DISTINCT ?artist ?mbid ?label ?labelLabel ?country ?countryLabel ?genre ?genreLabel ?website ?isrc
WHERE {{
?artist rdfs:label "{safe}"@en .
{{ ?artist wdt:P31/wdt:P279* wd:Q5 }} UNION {{ ?artist wdt:P31 wd:Q215380 }}
OPTIONAL {{ ?artist wdt:P434 ?mbid }}
OPTIONAL {{ ?artist wdt:P264 ?label }}
OPTIONAL {{ ?artist wdt:P27 ?country }}
OPTIONAL {{ ?artist wdt:P136 ?genre }}
OPTIONAL {{ ?artist wdt:P856 ?website }}
OPTIONAL {{ ?artist wdt:P1243 ?isrc }}
SERVICE wikibase:label {{ bd:serviceParam wikibase:language "en" }}
}} LIMIT 20"#
);
let client = reqwest::Client::builder()
.user_agent(UA)
.timeout(std::time::Duration::from_secs(10))
.build()?;
let resp = client
.get(SPARQL)
.query(&[("query", &query), ("format", &"json".to_string())])
.send()
.await?
.json::<SparqlResp>()
.await?;
let b = &resp.results.bindings;
if b.is_empty() {
return Ok(WikidataArtist::default());
}
let ext = |key: &str| -> Option<String> { b[0][key]["value"].as_str().map(|s| s.into()) };
let qid = ext("artist")
.as_ref()
.and_then(|u| u.rsplit('/').next().map(|s| s.into()));
let wikidata_url = qid
.as_ref()
.map(|q| format!("https://www.wikidata.org/wiki/{q}"));
let mut genres = Vec::new();
let mut known_isrcs = Vec::new();
for row in b {
if let Some(g) = row["genreLabel"]["value"].as_str() {
let g = g.to_string();
if !genres.contains(&g) {
genres.push(g);
}
}
if let Some(i) = row["isrc"]["value"].as_str() {
let i = i.to_string();
if !known_isrcs.contains(&i) {
known_isrcs.push(i);
}
}
}
let a = WikidataArtist {
qid,
wikidata_url,
musicbrainz_id: ext("mbid"),
label_name: ext("labelLabel"),
label_qid: ext("label").and_then(|u| u.rsplit('/').next().map(|s| s.into())),
country: ext("countryLabel"),
genres,
website: ext("website"),
known_isrcs,
};
info!(artist=%name, qid=?a.qid, "Wikidata enriched");
Ok(a)
}
pub async fn isrc_exists(isrc: &str) -> bool {
let query = format!(
r#"ASK {{ ?item wdt:P1243 "{}" }}"#,
isrc.replace('"', "\\\"")
);
#[derive(Deserialize)]
struct AskResp {
boolean: bool,
}
let client = reqwest::Client::builder()
.user_agent(UA)
.timeout(std::time::Duration::from_secs(5))
.build()
.unwrap_or_default();
match client
.get(SPARQL)
.query(&[("query", &query), ("format", &"json".to_string())])
.send()
.await
{
Ok(r) => r
.json::<AskResp>()
.await
.map(|a| a.boolean)
.unwrap_or(false),
Err(_) => false,
}
}
|