rikhoffbauer2 commited on
Commit
8041e59
·
verified ·
1 Parent(s): d5208bc

Upload lyric_sync/identify.py

Browse files
Files changed (1) hide show
  1. lyric_sync/identify.py +292 -0
lyric_sync/identify.py ADDED
@@ -0,0 +1,292 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Song identification via audio fingerprinting and transcription fallback.
3
+
4
+ Primary: Chromaprint/AcoustID fingerprint → MusicBrainz metadata
5
+ Secondary: Vocal transcription → lyrics search (Genius/web)
6
+ """
7
+
8
+ import json
9
+ import subprocess
10
+ import logging
11
+ from dataclasses import dataclass
12
+ from typing import Optional
13
+
14
+ import requests
15
+
16
+ logger = logging.getLogger(__name__)
17
+
18
+
19
+ @dataclass
20
+ class SongIdentification:
21
+ """Result of song identification."""
22
+ title: str
23
+ artist: str
24
+ album: Optional[str] = None
25
+ mbid: Optional[str] = None # MusicBrainz Recording ID
26
+ score: float = 0.0
27
+ method: str = "unknown" # "acoustid" | "transcription_search"
28
+
29
+
30
+ class AcoustIDIdentifier:
31
+ """Identify songs via Chromaprint fingerprinting and AcoustID lookup."""
32
+
33
+ ACOUSTID_API_URL = "https://api.acoustid.org/v2/lookup"
34
+
35
+ def __init__(self, api_key: str, fpcalc_path: str = "fpcalc"):
36
+ """
37
+ Args:
38
+ api_key: AcoustID application API key (register at acoustid.org/login)
39
+ fpcalc_path: Path to fpcalc binary (from chromaprint-tools)
40
+ """
41
+ self.api_key = api_key
42
+ self.fpcalc_path = fpcalc_path
43
+
44
+ def fingerprint(self, audio_path: str, duration_limit: int = 120) -> dict:
45
+ """
46
+ Generate audio fingerprint using fpcalc.
47
+
48
+ Args:
49
+ audio_path: Path to audio file
50
+ duration_limit: Max seconds to analyze (120 is optimal for AcoustID)
51
+
52
+ Returns:
53
+ {'duration': int, 'fingerprint': str}
54
+ """
55
+ result = subprocess.run(
56
+ [self.fpcalc_path, "-json", "-length", str(duration_limit), audio_path],
57
+ capture_output=True, text=True, check=True, timeout=60
58
+ )
59
+ return json.loads(result.stdout)
60
+
61
+ def lookup(self, fingerprint: str, duration: int) -> Optional[SongIdentification]:
62
+ """
63
+ Look up a fingerprint via the AcoustID web API.
64
+
65
+ Args:
66
+ fingerprint: Base64 fingerprint string from fpcalc
67
+ duration: Audio duration in seconds
68
+
69
+ Returns:
70
+ SongIdentification or None if no match
71
+ """
72
+ resp = requests.post(self.ACOUSTID_API_URL, data={
73
+ "client": self.api_key,
74
+ "duration": duration,
75
+ "fingerprint": fingerprint,
76
+ "meta": "recordings releasegroups",
77
+ "format": "json",
78
+ }, timeout=15)
79
+ resp.raise_for_status()
80
+ data = resp.json()
81
+
82
+ if data.get("status") != "ok" or not data.get("results"):
83
+ return None
84
+
85
+ # Sort by score descending
86
+ results = sorted(data["results"], key=lambda r: r.get("score", 0), reverse=True)
87
+ best = results[0]
88
+
89
+ if best.get("score", 0) < 0.5:
90
+ return None
91
+
92
+ recordings = best.get("recordings", [])
93
+ if not recordings:
94
+ return None
95
+
96
+ rec = recordings[0]
97
+ artist = rec.get("artists", [{}])[0].get("name", "Unknown")
98
+ album = None
99
+ rgs = rec.get("releasegroups", [])
100
+ if rgs:
101
+ album = rgs[0].get("title")
102
+
103
+ return SongIdentification(
104
+ title=rec.get("title", "Unknown"),
105
+ artist=artist,
106
+ album=album,
107
+ mbid=rec.get("id"),
108
+ score=best["score"],
109
+ method="acoustid",
110
+ )
111
+
112
+ def identify(self, audio_path: str) -> Optional[SongIdentification]:
113
+ """
114
+ Full identification: fingerprint + lookup.
115
+
116
+ Args:
117
+ audio_path: Path to audio file
118
+
119
+ Returns:
120
+ SongIdentification or None
121
+ """
122
+ try:
123
+ fp_data = self.fingerprint(audio_path)
124
+ except (subprocess.CalledProcessError, FileNotFoundError) as e:
125
+ logger.warning(f"fpcalc failed: {e}")
126
+ return None
127
+ except json.JSONDecodeError:
128
+ logger.warning("fpcalc returned invalid JSON")
129
+ return None
130
+
131
+ return self.lookup(fp_data["fingerprint"], fp_data["duration"])
132
+
133
+
134
+ class TranscriptionSearchIdentifier:
135
+ """
136
+ Fallback: identify song by transcribing vocals and searching lyrics databases.
137
+ Uses Genius API to search for lyric fragments.
138
+ """
139
+
140
+ GENIUS_SEARCH_URL = "https://api.genius.com/search"
141
+
142
+ def __init__(self, genius_token: Optional[str] = None):
143
+ """
144
+ Args:
145
+ genius_token: Genius API access token (optional, can also use web scraping)
146
+ """
147
+ self.genius_token = genius_token
148
+
149
+ def identify_from_transcript(self, transcript: str) -> Optional[SongIdentification]:
150
+ """
151
+ Search for a song using a transcript fragment.
152
+
153
+ Args:
154
+ transcript: Raw transcription text from vocals
155
+
156
+ Returns:
157
+ SongIdentification or None
158
+ """
159
+ # Use a ~5-15 word fragment from the middle (likely chorus area)
160
+ words = transcript.split()
161
+ if len(words) < 5:
162
+ return None
163
+
164
+ # Try multiple fragments: middle, first quarter, third quarter
165
+ fragments = self._extract_search_fragments(words)
166
+
167
+ for fragment in fragments:
168
+ result = self._search_genius(fragment)
169
+ if result:
170
+ return result
171
+ result = self._search_web(fragment)
172
+ if result:
173
+ return result
174
+
175
+ return None
176
+
177
+ def _extract_search_fragments(self, words: list[str], fragment_len: int = 8) -> list[str]:
178
+ """Extract distinctive fragments from transcript for searching."""
179
+ fragments = []
180
+ positions = [
181
+ len(words) // 2, # middle (likely chorus)
182
+ len(words) // 4, # first quarter
183
+ 3 * len(words) // 4, # third quarter
184
+ ]
185
+ for pos in positions:
186
+ start = max(0, pos - fragment_len // 2)
187
+ end = min(len(words), start + fragment_len)
188
+ fragment = " ".join(words[start:end])
189
+ if fragment:
190
+ fragments.append(fragment)
191
+ return fragments
192
+
193
+ def _search_genius(self, query: str) -> Optional[SongIdentification]:
194
+ """Search Genius API for lyric fragment."""
195
+ if not self.genius_token:
196
+ return None
197
+
198
+ try:
199
+ resp = requests.get(
200
+ self.GENIUS_SEARCH_URL,
201
+ params={"q": query},
202
+ headers={"Authorization": f"Bearer {self.genius_token}"},
203
+ timeout=10,
204
+ )
205
+ resp.raise_for_status()
206
+ hits = resp.json().get("response", {}).get("hits", [])
207
+ if not hits:
208
+ return None
209
+
210
+ result = hits[0]["result"]
211
+ return SongIdentification(
212
+ title=result["title"],
213
+ artist=result["primary_artist"]["name"],
214
+ score=0.6, # lower confidence for text-based search
215
+ method="transcription_search",
216
+ )
217
+ except (requests.RequestException, KeyError, ValueError) as e:
218
+ logger.warning(f"Genius search failed: {e}")
219
+ return None
220
+
221
+ def _search_web(self, query: str) -> Optional[SongIdentification]:
222
+ """
223
+ Fallback web search for lyrics.
224
+ Uses a simple heuristic search via a lyrics-focused query.
225
+
226
+ Note: This is a placeholder for web search integration.
227
+ In production, you'd integrate with a search engine API.
228
+ """
229
+ # Search LRCLIB by text (it has a search endpoint)
230
+ try:
231
+ resp = requests.get(
232
+ "https://lrclib.net/api/search",
233
+ params={"q": query},
234
+ timeout=10,
235
+ )
236
+ if resp.status_code == 200:
237
+ results = resp.json()
238
+ if results:
239
+ best = results[0]
240
+ return SongIdentification(
241
+ title=best.get("trackName", "Unknown"),
242
+ artist=best.get("artistName", "Unknown"),
243
+ album=best.get("albumName"),
244
+ score=0.5,
245
+ method="transcription_search",
246
+ )
247
+ except (requests.RequestException, ValueError) as e:
248
+ logger.debug(f"LRCLIB search failed: {e}")
249
+
250
+ return None
251
+
252
+
253
+ def identify_song(
254
+ audio_path: str,
255
+ acoustid_key: Optional[str] = None,
256
+ genius_token: Optional[str] = None,
257
+ transcript: Optional[str] = None,
258
+ ) -> Optional[SongIdentification]:
259
+ """
260
+ Identify a song using available methods.
261
+
262
+ Primary: AcoustID fingerprinting (requires acoustid_key + fpcalc installed)
263
+ Fallback: Transcript-based lyrics search (requires transcript text)
264
+
265
+ Args:
266
+ audio_path: Path to audio file
267
+ acoustid_key: AcoustID API key
268
+ genius_token: Genius API token (for fallback search)
269
+ transcript: Pre-computed transcript (for fallback; pipeline provides this)
270
+
271
+ Returns:
272
+ SongIdentification or None
273
+ """
274
+ # Primary: AcoustID
275
+ if acoustid_key:
276
+ identifier = AcoustIDIdentifier(acoustid_key)
277
+ result = identifier.identify(audio_path)
278
+ if result and result.score >= 0.7:
279
+ logger.info(f"AcoustID match: {result.artist} - {result.title} (score={result.score:.2f})")
280
+ return result
281
+ elif result:
282
+ logger.info(f"Low-confidence AcoustID match: {result.artist} - {result.title} (score={result.score:.2f})")
283
+
284
+ # Fallback: Transcription search
285
+ if transcript:
286
+ searcher = TranscriptionSearchIdentifier(genius_token)
287
+ result = searcher.identify_from_transcript(transcript)
288
+ if result:
289
+ logger.info(f"Transcript search match: {result.artist} - {result.title}")
290
+ return result
291
+
292
+ return None