lyric-sync / lyric_sync /identify.py
rikhoffbauer2's picture
Upload lyric_sync/identify.py
8041e59 verified
"""
Song identification via audio fingerprinting and transcription fallback.
Primary: Chromaprint/AcoustID fingerprint → MusicBrainz metadata
Secondary: Vocal transcription → lyrics search (Genius/web)
"""
import json
import subprocess
import logging
from dataclasses import dataclass
from typing import Optional
import requests
logger = logging.getLogger(__name__)
@dataclass
class SongIdentification:
"""Result of song identification."""
title: str
artist: str
album: Optional[str] = None
mbid: Optional[str] = None # MusicBrainz Recording ID
score: float = 0.0
method: str = "unknown" # "acoustid" | "transcription_search"
class AcoustIDIdentifier:
"""Identify songs via Chromaprint fingerprinting and AcoustID lookup."""
ACOUSTID_API_URL = "https://api.acoustid.org/v2/lookup"
def __init__(self, api_key: str, fpcalc_path: str = "fpcalc"):
"""
Args:
api_key: AcoustID application API key (register at acoustid.org/login)
fpcalc_path: Path to fpcalc binary (from chromaprint-tools)
"""
self.api_key = api_key
self.fpcalc_path = fpcalc_path
def fingerprint(self, audio_path: str, duration_limit: int = 120) -> dict:
"""
Generate audio fingerprint using fpcalc.
Args:
audio_path: Path to audio file
duration_limit: Max seconds to analyze (120 is optimal for AcoustID)
Returns:
{'duration': int, 'fingerprint': str}
"""
result = subprocess.run(
[self.fpcalc_path, "-json", "-length", str(duration_limit), audio_path],
capture_output=True, text=True, check=True, timeout=60
)
return json.loads(result.stdout)
def lookup(self, fingerprint: str, duration: int) -> Optional[SongIdentification]:
"""
Look up a fingerprint via the AcoustID web API.
Args:
fingerprint: Base64 fingerprint string from fpcalc
duration: Audio duration in seconds
Returns:
SongIdentification or None if no match
"""
resp = requests.post(self.ACOUSTID_API_URL, data={
"client": self.api_key,
"duration": duration,
"fingerprint": fingerprint,
"meta": "recordings releasegroups",
"format": "json",
}, timeout=15)
resp.raise_for_status()
data = resp.json()
if data.get("status") != "ok" or not data.get("results"):
return None
# Sort by score descending
results = sorted(data["results"], key=lambda r: r.get("score", 0), reverse=True)
best = results[0]
if best.get("score", 0) < 0.5:
return None
recordings = best.get("recordings", [])
if not recordings:
return None
rec = recordings[0]
artist = rec.get("artists", [{}])[0].get("name", "Unknown")
album = None
rgs = rec.get("releasegroups", [])
if rgs:
album = rgs[0].get("title")
return SongIdentification(
title=rec.get("title", "Unknown"),
artist=artist,
album=album,
mbid=rec.get("id"),
score=best["score"],
method="acoustid",
)
def identify(self, audio_path: str) -> Optional[SongIdentification]:
"""
Full identification: fingerprint + lookup.
Args:
audio_path: Path to audio file
Returns:
SongIdentification or None
"""
try:
fp_data = self.fingerprint(audio_path)
except (subprocess.CalledProcessError, FileNotFoundError) as e:
logger.warning(f"fpcalc failed: {e}")
return None
except json.JSONDecodeError:
logger.warning("fpcalc returned invalid JSON")
return None
return self.lookup(fp_data["fingerprint"], fp_data["duration"])
class TranscriptionSearchIdentifier:
"""
Fallback: identify song by transcribing vocals and searching lyrics databases.
Uses Genius API to search for lyric fragments.
"""
GENIUS_SEARCH_URL = "https://api.genius.com/search"
def __init__(self, genius_token: Optional[str] = None):
"""
Args:
genius_token: Genius API access token (optional, can also use web scraping)
"""
self.genius_token = genius_token
def identify_from_transcript(self, transcript: str) -> Optional[SongIdentification]:
"""
Search for a song using a transcript fragment.
Args:
transcript: Raw transcription text from vocals
Returns:
SongIdentification or None
"""
# Use a ~5-15 word fragment from the middle (likely chorus area)
words = transcript.split()
if len(words) < 5:
return None
# Try multiple fragments: middle, first quarter, third quarter
fragments = self._extract_search_fragments(words)
for fragment in fragments:
result = self._search_genius(fragment)
if result:
return result
result = self._search_web(fragment)
if result:
return result
return None
def _extract_search_fragments(self, words: list[str], fragment_len: int = 8) -> list[str]:
"""Extract distinctive fragments from transcript for searching."""
fragments = []
positions = [
len(words) // 2, # middle (likely chorus)
len(words) // 4, # first quarter
3 * len(words) // 4, # third quarter
]
for pos in positions:
start = max(0, pos - fragment_len // 2)
end = min(len(words), start + fragment_len)
fragment = " ".join(words[start:end])
if fragment:
fragments.append(fragment)
return fragments
def _search_genius(self, query: str) -> Optional[SongIdentification]:
"""Search Genius API for lyric fragment."""
if not self.genius_token:
return None
try:
resp = requests.get(
self.GENIUS_SEARCH_URL,
params={"q": query},
headers={"Authorization": f"Bearer {self.genius_token}"},
timeout=10,
)
resp.raise_for_status()
hits = resp.json().get("response", {}).get("hits", [])
if not hits:
return None
result = hits[0]["result"]
return SongIdentification(
title=result["title"],
artist=result["primary_artist"]["name"],
score=0.6, # lower confidence for text-based search
method="transcription_search",
)
except (requests.RequestException, KeyError, ValueError) as e:
logger.warning(f"Genius search failed: {e}")
return None
def _search_web(self, query: str) -> Optional[SongIdentification]:
"""
Fallback web search for lyrics.
Uses a simple heuristic search via a lyrics-focused query.
Note: This is a placeholder for web search integration.
In production, you'd integrate with a search engine API.
"""
# Search LRCLIB by text (it has a search endpoint)
try:
resp = requests.get(
"https://lrclib.net/api/search",
params={"q": query},
timeout=10,
)
if resp.status_code == 200:
results = resp.json()
if results:
best = results[0]
return SongIdentification(
title=best.get("trackName", "Unknown"),
artist=best.get("artistName", "Unknown"),
album=best.get("albumName"),
score=0.5,
method="transcription_search",
)
except (requests.RequestException, ValueError) as e:
logger.debug(f"LRCLIB search failed: {e}")
return None
def identify_song(
audio_path: str,
acoustid_key: Optional[str] = None,
genius_token: Optional[str] = None,
transcript: Optional[str] = None,
) -> Optional[SongIdentification]:
"""
Identify a song using available methods.
Primary: AcoustID fingerprinting (requires acoustid_key + fpcalc installed)
Fallback: Transcript-based lyrics search (requires transcript text)
Args:
audio_path: Path to audio file
acoustid_key: AcoustID API key
genius_token: Genius API token (for fallback search)
transcript: Pre-computed transcript (for fallback; pipeline provides this)
Returns:
SongIdentification or None
"""
# Primary: AcoustID
if acoustid_key:
identifier = AcoustIDIdentifier(acoustid_key)
result = identifier.identify(audio_path)
if result and result.score >= 0.7:
logger.info(f"AcoustID match: {result.artist} - {result.title} (score={result.score:.2f})")
return result
elif result:
logger.info(f"Low-confidence AcoustID match: {result.artist} - {result.title} (score={result.score:.2f})")
# Fallback: Transcription search
if transcript:
searcher = TranscriptionSearchIdentifier(genius_token)
result = searcher.identify_from_transcript(transcript)
if result:
logger.info(f"Transcript search match: {result.artist} - {result.title}")
return result
return None