rikhoffbauer2
/

lyric-sync

ml-intern

Model card Files Files and versions

xet

Community

rikhoffbauer2 commited on 18 days ago

Commit

8041e59

verified ·

1 Parent(s): d5208bc

Upload lyric_sync/identify.py

Browse files

Files changed (1) hide show

lyric_sync/identify.py +292 -0

lyric_sync/identify.py ADDED Viewed

	@@ -0,0 +1,292 @@

+"""
+Song identification via audio fingerprinting and transcription fallback.
+Primary: Chromaprint/AcoustID fingerprint → MusicBrainz metadata
+Secondary: Vocal transcription → lyrics search (Genius/web)
+"""
+import json
+import subprocess
+import logging
+from dataclasses import dataclass
+from typing import Optional
+import requests
+logger = logging.getLogger(__name__)
+@dataclass
+class SongIdentification:
+    """Result of song identification."""
+    title: str
+    artist: str
+    album: Optional[str] = None
+    mbid: Optional[str] = None  # MusicBrainz Recording ID
+    score: float = 0.0
+    method: str = "unknown"  # "acoustid" | "transcription_search"
+class AcoustIDIdentifier:
+    """Identify songs via Chromaprint fingerprinting and AcoustID lookup."""
+    ACOUSTID_API_URL = "https://api.acoustid.org/v2/lookup"
+    def __init__(self, api_key: str, fpcalc_path: str = "fpcalc"):
+        """
+        Args:
+            api_key: AcoustID application API key (register at acoustid.org/login)
+            fpcalc_path: Path to fpcalc binary (from chromaprint-tools)
+        """
+        self.api_key = api_key
+        self.fpcalc_path = fpcalc_path
+    def fingerprint(self, audio_path: str, duration_limit: int = 120) -> dict:
+        """
+        Generate audio fingerprint using fpcalc.
+        Args:
+            audio_path: Path to audio file
+            duration_limit: Max seconds to analyze (120 is optimal for AcoustID)
+        Returns:
+            {'duration': int, 'fingerprint': str}
+        """
+        result = subprocess.run(
+            [self.fpcalc_path, "-json", "-length", str(duration_limit), audio_path],
+            capture_output=True, text=True, check=True, timeout=60
+        )
+        return json.loads(result.stdout)
+    def lookup(self, fingerprint: str, duration: int) -> Optional[SongIdentification]:
+        """
+        Look up a fingerprint via the AcoustID web API.
+        Args:
+            fingerprint: Base64 fingerprint string from fpcalc
+            duration: Audio duration in seconds
+        Returns:
+            SongIdentification or None if no match
+        """
+        resp = requests.post(self.ACOUSTID_API_URL, data={
+            "client": self.api_key,
+            "duration": duration,
+            "fingerprint": fingerprint,
+            "meta": "recordings releasegroups",
+            "format": "json",
+        }, timeout=15)
+        resp.raise_for_status()
+        data = resp.json()
+        if data.get("status") != "ok" or not data.get("results"):
+            return None
+        # Sort by score descending
+        results = sorted(data["results"], key=lambda r: r.get("score", 0), reverse=True)
+        best = results[0]
+        if best.get("score", 0) < 0.5:
+            return None
+        recordings = best.get("recordings", [])
+        if not recordings:
+            return None
+        rec = recordings[0]
+        artist = rec.get("artists", [{}])[0].get("name", "Unknown")
+        album = None
+        rgs = rec.get("releasegroups", [])
+        if rgs:
+            album = rgs[0].get("title")
+        return SongIdentification(
+            title=rec.get("title", "Unknown"),
+            artist=artist,
+            album=album,
+            mbid=rec.get("id"),
+            score=best["score"],
+            method="acoustid",
+        )
+    def identify(self, audio_path: str) -> Optional[SongIdentification]:
+        """
+        Full identification: fingerprint + lookup.
+        Args:
+            audio_path: Path to audio file
+        Returns:
+            SongIdentification or None
+        """
+        try:
+            fp_data = self.fingerprint(audio_path)
+        except (subprocess.CalledProcessError, FileNotFoundError) as e:
+            logger.warning(f"fpcalc failed: {e}")
+            return None
+        except json.JSONDecodeError:
+            logger.warning("fpcalc returned invalid JSON")
+            return None
+        return self.lookup(fp_data["fingerprint"], fp_data["duration"])
+class TranscriptionSearchIdentifier:
+    """
+    Fallback: identify song by transcribing vocals and searching lyrics databases.
+    Uses Genius API to search for lyric fragments.
+    """
+    GENIUS_SEARCH_URL = "https://api.genius.com/search"
+    def __init__(self, genius_token: Optional[str] = None):
+        """
+        Args:
+            genius_token: Genius API access token (optional, can also use web scraping)
+        """
+        self.genius_token = genius_token
+    def identify_from_transcript(self, transcript: str) -> Optional[SongIdentification]:
+        """
+        Search for a song using a transcript fragment.
+        Args:
+            transcript: Raw transcription text from vocals
+        Returns:
+            SongIdentification or None
+        """
+        # Use a ~5-15 word fragment from the middle (likely chorus area)
+        words = transcript.split()
+        if len(words) < 5:
+            return None
+        # Try multiple fragments: middle, first quarter, third quarter
+        fragments = self._extract_search_fragments(words)
+        for fragment in fragments:
+            result = self._search_genius(fragment)
+            if result:
+                return result
+            result = self._search_web(fragment)
+            if result:
+                return result
+        return None
+    def _extract_search_fragments(self, words: list[str], fragment_len: int = 8) -> list[str]:
+        """Extract distinctive fragments from transcript for searching."""
+        fragments = []
+        positions = [
+            len(words) // 2,       # middle (likely chorus)
+            len(words) // 4,       # first quarter
+            3 * len(words) // 4,   # third quarter
+        ]
+        for pos in positions:
+            start = max(0, pos - fragment_len // 2)
+            end = min(len(words), start + fragment_len)
+            fragment = " ".join(words[start:end])
+            if fragment:
+                fragments.append(fragment)
+        return fragments
+    def _search_genius(self, query: str) -> Optional[SongIdentification]:
+        """Search Genius API for lyric fragment."""
+        if not self.genius_token:
+            return None
+        try:
+            resp = requests.get(
+                self.GENIUS_SEARCH_URL,
+                params={"q": query},
+                headers={"Authorization": f"Bearer {self.genius_token}"},
+                timeout=10,
+            )
+            resp.raise_for_status()
+            hits = resp.json().get("response", {}).get("hits", [])
+            if not hits:
+                return None
+            result = hits[0]["result"]
+            return SongIdentification(
+                title=result["title"],
+                artist=result["primary_artist"]["name"],
+                score=0.6,  # lower confidence for text-based search
+                method="transcription_search",
+            )
+        except (requests.RequestException, KeyError, ValueError) as e:
+            logger.warning(f"Genius search failed: {e}")
+            return None
+    def _search_web(self, query: str) -> Optional[SongIdentification]:
+        """
+        Fallback web search for lyrics.
+        Uses a simple heuristic search via a lyrics-focused query.
+        Note: This is a placeholder for web search integration.
+        In production, you'd integrate with a search engine API.
+        """
+        # Search LRCLIB by text (it has a search endpoint)
+        try:
+            resp = requests.get(
+                "https://lrclib.net/api/search",
+                params={"q": query},
+                timeout=10,
+            )
+            if resp.status_code == 200:
+                results = resp.json()
+                if results:
+                    best = results[0]
+                    return SongIdentification(
+                        title=best.get("trackName", "Unknown"),
+                        artist=best.get("artistName", "Unknown"),
+                        album=best.get("albumName"),
+                        score=0.5,
+                        method="transcription_search",
+                    )
+        except (requests.RequestException, ValueError) as e:
+            logger.debug(f"LRCLIB search failed: {e}")
+        return None
+def identify_song(
+    audio_path: str,
+    acoustid_key: Optional[str] = None,
+    genius_token: Optional[str] = None,
+    transcript: Optional[str] = None,
+) -> Optional[SongIdentification]:
+    """
+    Identify a song using available methods.
+    Primary: AcoustID fingerprinting (requires acoustid_key + fpcalc installed)
+    Fallback: Transcript-based lyrics search (requires transcript text)
+    Args:
+        audio_path: Path to audio file
+        acoustid_key: AcoustID API key
+        genius_token: Genius API token (for fallback search)
+        transcript: Pre-computed transcript (for fallback; pipeline provides this)
+    Returns:
+        SongIdentification or None
+    """
+    # Primary: AcoustID
+    if acoustid_key:
+        identifier = AcoustIDIdentifier(acoustid_key)
+        result = identifier.identify(audio_path)
+        if result and result.score >= 0.7:
+            logger.info(f"AcoustID match: {result.artist} - {result.title} (score={result.score:.2f})")
+            return result
+        elif result:
+            logger.info(f"Low-confidence AcoustID match: {result.artist} - {result.title} (score={result.score:.2f})")
+    # Fallback: Transcription search
+    if transcript:
+        searcher = TranscriptionSearchIdentifier(genius_token)
+        result = searcher.identify_from_transcript(transcript)
+        if result:
+            logger.info(f"Transcript search match: {result.artist} - {result.title}")
+            return result
+    return None