File size: 4,164 Bytes
1295969
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
//! Wikidata SPARQL enrichment — artist QID, MusicBrainz ID, label, genres.
use serde::{Deserialize, Serialize};
use tracing::{info, warn};

const SPARQL: &str = "https://query.wikidata.org/sparql";
const UA: &str = "RetrosyncMediaGroup/1.0 (https://retrosync.media)";

#[derive(Debug, Clone, Serialize, Deserialize, Default)]
pub struct WikidataArtist {
    pub qid: Option<String>,
    pub wikidata_url: Option<String>,
    pub musicbrainz_id: Option<String>,
    pub label_name: Option<String>,
    pub label_qid: Option<String>,
    pub country: Option<String>,
    pub genres: Vec<String>,
    pub website: Option<String>,
    pub known_isrcs: Vec<String>,
}

#[derive(Deserialize)]
struct SparqlResp {
    results: SparqlResults,
}
#[derive(Deserialize)]
struct SparqlResults {
    bindings: Vec<serde_json::Value>,
}

pub async fn lookup_artist(name: &str) -> WikidataArtist {
    match lookup_inner(name).await {
        Ok(a) => a,
        Err(e) => {
            warn!(artist=%name, err=%e, "Wikidata failed");
            WikidataArtist::default()
        }
    }
}

async fn lookup_inner(name: &str) -> anyhow::Result<WikidataArtist> {
    let safe = name.replace('"', "\\\"");
    let query = format!(
        r#"
SELECT DISTINCT ?artist ?mbid ?label ?labelLabel ?country ?countryLabel ?genre ?genreLabel ?website ?isrc
WHERE {{
  ?artist rdfs:label "{safe}"@en .
  {{ ?artist wdt:P31/wdt:P279* wd:Q5 }} UNION {{ ?artist wdt:P31 wd:Q215380 }}
  OPTIONAL {{ ?artist wdt:P434 ?mbid }}
  OPTIONAL {{ ?artist wdt:P264 ?label }}
  OPTIONAL {{ ?artist wdt:P27  ?country }}
  OPTIONAL {{ ?artist wdt:P136 ?genre }}
  OPTIONAL {{ ?artist wdt:P856 ?website }}
  OPTIONAL {{ ?artist wdt:P1243 ?isrc }}
  SERVICE wikibase:label {{ bd:serviceParam wikibase:language "en" }}
}} LIMIT 20"#
    );

    let client = reqwest::Client::builder()
        .user_agent(UA)
        .timeout(std::time::Duration::from_secs(10))
        .build()?;
    let resp = client
        .get(SPARQL)
        .query(&[("query", &query), ("format", &"json".to_string())])
        .send()
        .await?
        .json::<SparqlResp>()
        .await?;

    let b = &resp.results.bindings;
    if b.is_empty() {
        return Ok(WikidataArtist::default());
    }

    let ext = |key: &str| -> Option<String> { b[0][key]["value"].as_str().map(|s| s.into()) };
    let qid = ext("artist")
        .as_ref()
        .and_then(|u| u.rsplit('/').next().map(|s| s.into()));
    let wikidata_url = qid
        .as_ref()
        .map(|q| format!("https://www.wikidata.org/wiki/{q}"));
    let mut genres = Vec::new();
    let mut known_isrcs = Vec::new();
    for row in b {
        if let Some(g) = row["genreLabel"]["value"].as_str() {
            let g = g.to_string();
            if !genres.contains(&g) {
                genres.push(g);
            }
        }
        if let Some(i) = row["isrc"]["value"].as_str() {
            let i = i.to_string();
            if !known_isrcs.contains(&i) {
                known_isrcs.push(i);
            }
        }
    }
    let a = WikidataArtist {
        qid,
        wikidata_url,
        musicbrainz_id: ext("mbid"),
        label_name: ext("labelLabel"),
        label_qid: ext("label").and_then(|u| u.rsplit('/').next().map(|s| s.into())),
        country: ext("countryLabel"),
        genres,
        website: ext("website"),
        known_isrcs,
    };
    info!(artist=%name, qid=?a.qid, "Wikidata enriched");
    Ok(a)
}

pub async fn isrc_exists(isrc: &str) -> bool {
    let query = format!(
        r#"ASK {{ ?item wdt:P1243 "{}" }}"#,
        isrc.replace('"', "\\\"")
    );
    #[derive(Deserialize)]
    struct AskResp {
        boolean: bool,
    }
    let client = reqwest::Client::builder()
        .user_agent(UA)
        .timeout(std::time::Duration::from_secs(5))
        .build()
        .unwrap_or_default();
    match client
        .get(SPARQL)
        .query(&[("query", &query), ("format", &"json".to_string())])
        .send()
        .await
    {
        Ok(r) => r
            .json::<AskResp>()
            .await
            .map(|a| a.boolean)
            .unwrap_or(false),
        Err(_) => false,
    }
}