| """ |
| Lyrics acquisition from online databases. |
| |
| Fetches reference (correct) lyrics given artist + title. |
| Supports synced LRC format and plain text. |
| |
| Priority sources: |
| 1. LRCLIB (free, no auth, synced LRC available) |
| 2. syncedlyrics library (multi-source aggregator) |
| 3. Genius (plain text fallback, requires API key) |
| """ |
|
|
| import logging |
| import re |
| from dataclasses import dataclass, field |
| from typing import Optional |
|
|
| import requests |
|
|
| logger = logging.getLogger(__name__) |
|
|
|
|
| @dataclass |
| class LyricLine: |
| """A single line of lyrics with optional timing.""" |
| text: str |
| timestamp: Optional[float] = None |
|
|
| @property |
| def words(self) -> list[str]: |
| """Split line into words.""" |
| return self.text.split() |
|
|
|
|
| @dataclass |
| class Lyrics: |
| """Complete lyrics for a song.""" |
| plain_text: str |
| lines: list[LyricLine] = field(default_factory=list) |
| synced: bool = False |
| source: str = "unknown" |
| |
| @property |
| def words(self) -> list[str]: |
| """All words in the lyrics, preserving order.""" |
| return self.plain_text.split() |
|
|
| @property |
| def word_count(self) -> int: |
| return len(self.words) |
|
|
|
|
| def parse_lrc(lrc_text: str) -> list[LyricLine]: |
| """ |
| Parse LRC format into LyricLine objects. |
| |
| LRC format: [MM:SS.cs] Lyrics text here |
| Enhanced LRC: [MM:SS.cs] <MM:SS.cs> word <MM:SS.cs> word ... |
| """ |
| lines = [] |
| |
| pattern = r"\[(\d{2}):(\d{2})\.(\d{2,3})\]\s*(.*)" |
|
|
| for raw_line in lrc_text.strip().split("\n"): |
| raw_line = raw_line.strip() |
| if not raw_line: |
| continue |
|
|
| match = re.match(pattern, raw_line) |
| if match: |
| minutes = int(match.group(1)) |
| seconds = int(match.group(2)) |
| centiseconds = match.group(3) |
| |
| if len(centiseconds) == 2: |
| frac = int(centiseconds) / 100.0 |
| else: |
| frac = int(centiseconds) / 1000.0 |
|
|
| timestamp = minutes * 60 + seconds + frac |
| text = match.group(4).strip() |
|
|
| |
| text = re.sub(r"<\d{2}:\d{2}\.\d{2,3}>", "", text).strip() |
|
|
| if text: |
| lines.append(LyricLine(text=text, timestamp=timestamp)) |
| else: |
| |
| if not raw_line.startswith("["): |
| lines.append(LyricLine(text=raw_line)) |
|
|
| return lines |
|
|
|
|
| class LRCLIBFetcher: |
| """ |
| Fetch lyrics from LRCLIB.net — free, no auth, community-maintained. |
| Returns both synced LRC and plain text when available. |
| """ |
|
|
| BASE_URL = "https://lrclib.net/api" |
|
|
| def fetch( |
| self, |
| artist: str, |
| title: str, |
| album: Optional[str] = None, |
| duration: Optional[float] = None, |
| ) -> Optional[Lyrics]: |
| """ |
| Fetch lyrics by metadata match. |
| |
| Args: |
| artist: Artist name |
| title: Track title |
| album: Album name (optional, improves match accuracy) |
| duration: Track duration in seconds (optional) |
| """ |
| params = { |
| "artist_name": artist, |
| "track_name": title, |
| } |
| if album: |
| params["album_name"] = album |
| if duration: |
| params["duration"] = int(duration) |
|
|
| try: |
| resp = requests.get(f"{self.BASE_URL}/get", params=params, timeout=10) |
| if resp.status_code == 404: |
| logger.debug(f"LRCLIB: no match for {artist} - {title}") |
| return None |
| resp.raise_for_status() |
| data = resp.json() |
| except (requests.RequestException, ValueError) as e: |
| logger.warning(f"LRCLIB request failed: {e}") |
| return None |
|
|
| synced_lrc = data.get("syncedLyrics") |
| plain = data.get("plainLyrics", "") |
|
|
| if synced_lrc: |
| lines = parse_lrc(synced_lrc) |
| return Lyrics( |
| plain_text=plain or "\n".join(l.text for l in lines), |
| lines=lines, |
| synced=True, |
| source="lrclib", |
| ) |
| elif plain: |
| lines = [LyricLine(text=line.strip()) for line in plain.split("\n") if line.strip()] |
| return Lyrics(plain_text=plain, lines=lines, synced=False, source="lrclib") |
|
|
| return None |
|
|
| def search(self, query: str) -> Optional[Lyrics]: |
| """Search LRCLIB by text query (fuzzy).""" |
| try: |
| resp = requests.get(f"{self.BASE_URL}/search", params={"q": query}, timeout=10) |
| if resp.status_code != 200: |
| return None |
| results = resp.json() |
| if not results: |
| return None |
|
|
| |
| data = results[0] |
| synced_lrc = data.get("syncedLyrics") |
| plain = data.get("plainLyrics", "") |
|
|
| if synced_lrc: |
| lines = parse_lrc(synced_lrc) |
| return Lyrics( |
| plain_text=plain or "\n".join(l.text for l in lines), |
| lines=lines, |
| synced=True, |
| source="lrclib", |
| ) |
| elif plain: |
| lines = [LyricLine(text=line.strip()) for line in plain.split("\n") if line.strip()] |
| return Lyrics(plain_text=plain, lines=lines, synced=False, source="lrclib") |
| except (requests.RequestException, ValueError) as e: |
| logger.debug(f"LRCLIB search failed: {e}") |
|
|
| return None |
|
|
|
|
| class SyncedLyricsFetcher: |
| """ |
| Multi-source fetcher using the syncedlyrics library. |
| Tries: Lrclib → NetEase → Musixmatch → Megalobiz |
| """ |
|
|
| def fetch(self, artist: str, title: str) -> Optional[Lyrics]: |
| """Fetch synced lyrics using multiple providers.""" |
| try: |
| import syncedlyrics |
| except ImportError: |
| logger.warning("syncedlyrics not installed. pip install syncedlyrics") |
| return None |
|
|
| query = f"{artist} {title}" |
| try: |
| lrc_text = syncedlyrics.search( |
| query, |
| providers=["Lrclib", "NetEase", "Musixmatch", "Megalobiz"], |
| allow_plain_format=True, |
| ) |
| except Exception as e: |
| logger.warning(f"syncedlyrics search failed: {e}") |
| return None |
|
|
| if not lrc_text: |
| return None |
|
|
| |
| if re.search(r"\[\d{2}:\d{2}\.\d{2,3}\]", lrc_text): |
| lines = parse_lrc(lrc_text) |
| return Lyrics( |
| plain_text="\n".join(l.text for l in lines), |
| lines=lines, |
| synced=True, |
| source="syncedlyrics", |
| ) |
| else: |
| lines = [LyricLine(text=l.strip()) for l in lrc_text.split("\n") if l.strip()] |
| return Lyrics( |
| plain_text=lrc_text, |
| lines=lines, |
| synced=False, |
| source="syncedlyrics", |
| ) |
|
|
|
|
| class GeniusFetcher: |
| """ |
| Fetch plain-text lyrics from Genius. |
| Requires API token. No synced/timed lyrics available. |
| """ |
|
|
| def __init__(self, token: str): |
| self.token = token |
|
|
| def fetch(self, artist: str, title: str) -> Optional[Lyrics]: |
| """Fetch lyrics from Genius API.""" |
| try: |
| import lyricsgenius |
| except ImportError: |
| logger.warning("lyricsgenius not installed. pip install lyricsgenius") |
| return None |
|
|
| try: |
| genius = lyricsgenius.Genius(self.token, verbose=False) |
| genius.remove_section_headers = True |
| song = genius.search_song(title, artist) |
| if song and song.lyrics: |
| |
| text = self._clean_genius_lyrics(song.lyrics) |
| lines = [LyricLine(text=l.strip()) for l in text.split("\n") if l.strip()] |
| return Lyrics(plain_text=text, lines=lines, synced=False, source="genius") |
| except Exception as e: |
| logger.warning(f"Genius fetch failed: {e}") |
|
|
| return None |
|
|
| @staticmethod |
| def _clean_genius_lyrics(raw: str) -> str: |
| """Remove Genius-specific formatting.""" |
| |
| text = re.sub(r"\[.*?\]", "", raw) |
| |
| text = re.sub(r"\d+Embed$", "", text) |
| text = re.sub(r"You might also like", "", text) |
| |
| text = re.sub(r"\n{3,}", "\n\n", text) |
| return text.strip() |
|
|
|
|
| def fetch_lyrics( |
| artist: str, |
| title: str, |
| album: Optional[str] = None, |
| duration: Optional[float] = None, |
| genius_token: Optional[str] = None, |
| ) -> Optional[Lyrics]: |
| """ |
| Fetch lyrics using the best available source. |
| |
| Priority: |
| 1. LRCLIB (free, synced, no auth) |
| 2. syncedlyrics (multi-source, synced) |
| 3. Genius (plain text, requires token) |
| |
| Args: |
| artist: Artist name |
| title: Track title |
| album: Album name (optional) |
| duration: Track duration in seconds (optional) |
| genius_token: Genius API token (optional, for fallback) |
| |
| Returns: |
| Lyrics object or None |
| """ |
| |
| lrclib = LRCLIBFetcher() |
| result = lrclib.fetch(artist, title, album, duration) |
| if result: |
| logger.info(f"Lyrics from LRCLIB (synced={result.synced}): {len(result.words)} words") |
| return result |
|
|
| |
| synced = SyncedLyricsFetcher() |
| result = synced.fetch(artist, title) |
| if result: |
| logger.info(f"Lyrics from syncedlyrics (synced={result.synced}): {len(result.words)} words") |
| return result |
|
|
| |
| if genius_token: |
| genius = GeniusFetcher(genius_token) |
| result = genius.fetch(artist, title) |
| if result: |
| logger.info(f"Lyrics from Genius (plain): {len(result.words)} words") |
| return result |
|
|
| logger.warning(f"No lyrics found for: {artist} - {title}") |
| return None |
|
|