File size: 9,790 Bytes

8041e59

"""
Song identification via audio fingerprinting and transcription fallback.

Primary: Chromaprint/AcoustID fingerprint → MusicBrainz metadata
Secondary: Vocal transcription → lyrics search (Genius/web)
"""

import json
import subprocess
import logging
from dataclasses import dataclass
from typing import Optional

import requests

logger = logging.getLogger(__name__)


@dataclass
class SongIdentification:
    """Result of song identification."""
    title: str
    artist: str
    album: Optional[str] = None
    mbid: Optional[str] = None  # MusicBrainz Recording ID
    score: float = 0.0
    method: str = "unknown"  # "acoustid" | "transcription_search"


class AcoustIDIdentifier:
    """Identify songs via Chromaprint fingerprinting and AcoustID lookup."""

    ACOUSTID_API_URL = "https://api.acoustid.org/v2/lookup"

    def __init__(self, api_key: str, fpcalc_path: str = "fpcalc"):
        """
        Args:
            api_key: AcoustID application API key (register at acoustid.org/login)
            fpcalc_path: Path to fpcalc binary (from chromaprint-tools)
        """
        self.api_key = api_key
        self.fpcalc_path = fpcalc_path

    def fingerprint(self, audio_path: str, duration_limit: int = 120) -> dict:
        """
        Generate audio fingerprint using fpcalc.
        
        Args:
            audio_path: Path to audio file
            duration_limit: Max seconds to analyze (120 is optimal for AcoustID)
            
        Returns:
            {'duration': int, 'fingerprint': str}
        """
        result = subprocess.run(
            [self.fpcalc_path, "-json", "-length", str(duration_limit), audio_path],
            capture_output=True, text=True, check=True, timeout=60
        )
        return json.loads(result.stdout)

    def lookup(self, fingerprint: str, duration: int) -> Optional[SongIdentification]:
        """
        Look up a fingerprint via the AcoustID web API.
        
        Args:
            fingerprint: Base64 fingerprint string from fpcalc
            duration: Audio duration in seconds
            
        Returns:
            SongIdentification or None if no match
        """
        resp = requests.post(self.ACOUSTID_API_URL, data={
            "client": self.api_key,
            "duration": duration,
            "fingerprint": fingerprint,
            "meta": "recordings releasegroups",
            "format": "json",
        }, timeout=15)
        resp.raise_for_status()
        data = resp.json()

        if data.get("status") != "ok" or not data.get("results"):
            return None

        # Sort by score descending
        results = sorted(data["results"], key=lambda r: r.get("score", 0), reverse=True)
        best = results[0]

        if best.get("score", 0) < 0.5:
            return None

        recordings = best.get("recordings", [])
        if not recordings:
            return None

        rec = recordings[0]
        artist = rec.get("artists", [{}])[0].get("name", "Unknown")
        album = None
        rgs = rec.get("releasegroups", [])
        if rgs:
            album = rgs[0].get("title")

        return SongIdentification(
            title=rec.get("title", "Unknown"),
            artist=artist,
            album=album,
            mbid=rec.get("id"),
            score=best["score"],
            method="acoustid",
        )

    def identify(self, audio_path: str) -> Optional[SongIdentification]:
        """
        Full identification: fingerprint + lookup.
        
        Args:
            audio_path: Path to audio file
            
        Returns:
            SongIdentification or None
        """
        try:
            fp_data = self.fingerprint(audio_path)
        except (subprocess.CalledProcessError, FileNotFoundError) as e:
            logger.warning(f"fpcalc failed: {e}")
            return None
        except json.JSONDecodeError:
            logger.warning("fpcalc returned invalid JSON")
            return None

        return self.lookup(fp_data["fingerprint"], fp_data["duration"])


class TranscriptionSearchIdentifier:
    """
    Fallback: identify song by transcribing vocals and searching lyrics databases.
    Uses Genius API to search for lyric fragments.
    """

    GENIUS_SEARCH_URL = "https://api.genius.com/search"

    def __init__(self, genius_token: Optional[str] = None):
        """
        Args:
            genius_token: Genius API access token (optional, can also use web scraping)
        """
        self.genius_token = genius_token

    def identify_from_transcript(self, transcript: str) -> Optional[SongIdentification]:
        """
        Search for a song using a transcript fragment.
        
        Args:
            transcript: Raw transcription text from vocals
            
        Returns:
            SongIdentification or None
        """
        # Use a ~5-15 word fragment from the middle (likely chorus area)
        words = transcript.split()
        if len(words) < 5:
            return None

        # Try multiple fragments: middle, first quarter, third quarter
        fragments = self._extract_search_fragments(words)

        for fragment in fragments:
            result = self._search_genius(fragment)
            if result:
                return result
            result = self._search_web(fragment)
            if result:
                return result

        return None

    def _extract_search_fragments(self, words: list[str], fragment_len: int = 8) -> list[str]:
        """Extract distinctive fragments from transcript for searching."""
        fragments = []
        positions = [
            len(words) // 2,       # middle (likely chorus)
            len(words) // 4,       # first quarter
            3 * len(words) // 4,   # third quarter
        ]
        for pos in positions:
            start = max(0, pos - fragment_len // 2)
            end = min(len(words), start + fragment_len)
            fragment = " ".join(words[start:end])
            if fragment:
                fragments.append(fragment)
        return fragments

    def _search_genius(self, query: str) -> Optional[SongIdentification]:
        """Search Genius API for lyric fragment."""
        if not self.genius_token:
            return None

        try:
            resp = requests.get(
                self.GENIUS_SEARCH_URL,
                params={"q": query},
                headers={"Authorization": f"Bearer {self.genius_token}"},
                timeout=10,
            )
            resp.raise_for_status()
            hits = resp.json().get("response", {}).get("hits", [])
            if not hits:
                return None

            result = hits[0]["result"]
            return SongIdentification(
                title=result["title"],
                artist=result["primary_artist"]["name"],
                score=0.6,  # lower confidence for text-based search
                method="transcription_search",
            )
        except (requests.RequestException, KeyError, ValueError) as e:
            logger.warning(f"Genius search failed: {e}")
            return None

    def _search_web(self, query: str) -> Optional[SongIdentification]:
        """
        Fallback web search for lyrics.
        Uses a simple heuristic search via a lyrics-focused query.
        
        Note: This is a placeholder for web search integration.
        In production, you'd integrate with a search engine API.
        """
        # Search LRCLIB by text (it has a search endpoint)
        try:
            resp = requests.get(
                "https://lrclib.net/api/search",
                params={"q": query},
                timeout=10,
            )
            if resp.status_code == 200:
                results = resp.json()
                if results:
                    best = results[0]
                    return SongIdentification(
                        title=best.get("trackName", "Unknown"),
                        artist=best.get("artistName", "Unknown"),
                        album=best.get("albumName"),
                        score=0.5,
                        method="transcription_search",
                    )
        except (requests.RequestException, ValueError) as e:
            logger.debug(f"LRCLIB search failed: {e}")

        return None


def identify_song(
    audio_path: str,
    acoustid_key: Optional[str] = None,
    genius_token: Optional[str] = None,
    transcript: Optional[str] = None,
) -> Optional[SongIdentification]:
    """
    Identify a song using available methods.
    
    Primary: AcoustID fingerprinting (requires acoustid_key + fpcalc installed)
    Fallback: Transcript-based lyrics search (requires transcript text)
    
    Args:
        audio_path: Path to audio file
        acoustid_key: AcoustID API key
        genius_token: Genius API token (for fallback search)
        transcript: Pre-computed transcript (for fallback; pipeline provides this)
        
    Returns:
        SongIdentification or None
    """
    # Primary: AcoustID
    if acoustid_key:
        identifier = AcoustIDIdentifier(acoustid_key)
        result = identifier.identify(audio_path)
        if result and result.score >= 0.7:
            logger.info(f"AcoustID match: {result.artist} - {result.title} (score={result.score:.2f})")
            return result
        elif result:
            logger.info(f"Low-confidence AcoustID match: {result.artist} - {result.title} (score={result.score:.2f})")

    # Fallback: Transcription search
    if transcript:
        searcher = TranscriptionSearchIdentifier(genius_token)
        result = searcher.identify_from_transcript(transcript)
        if result:
            logger.info(f"Transcript search match: {result.artist} - {result.title}")
            return result

    return None