"""
Lyrics acquisition from online databases.

Fetches reference (correct) lyrics given artist + title.
Supports synced LRC format and plain text.

Priority sources:
1. LRCLIB (free, no auth, synced LRC available)
2. syncedlyrics library (multi-source aggregator)
3. Genius (plain text fallback, requires API key)
"""

import logging
import re
from dataclasses import dataclass, field
from typing import Optional

import requests

logger = logging.getLogger(__name__)


@dataclass
class LyricLine:
    """A single line of lyrics with optional timing."""
    text: str
    timestamp: Optional[float] = None  # seconds (from LRC format)

    @property
    def words(self) -> list[str]:
        """Split line into words."""
        return self.text.split()


@dataclass
class Lyrics:
    """Complete lyrics for a song."""
    plain_text: str
    lines: list[LyricLine] = field(default_factory=list)
    synced: bool = False  # Whether line-level timestamps are available
    source: str = "unknown"
    
    @property
    def words(self) -> list[str]:
        """All words in the lyrics, preserving order."""
        return self.plain_text.split()

    @property
    def word_count(self) -> int:
        return len(self.words)


def parse_lrc(lrc_text: str) -> list[LyricLine]:
    """
    Parse LRC format into LyricLine objects.
    
    LRC format: [MM:SS.cs] Lyrics text here
    Enhanced LRC: [MM:SS.cs] <MM:SS.cs> word <MM:SS.cs> word ...
    """
    lines = []
    # Pattern: [MM:SS.cc] or [MM:SS.ccc]
    pattern = r"\[(\d{2}):(\d{2})\.(\d{2,3})\]\s*(.*)"

    for raw_line in lrc_text.strip().split("\n"):
        raw_line = raw_line.strip()
        if not raw_line:
            continue

        match = re.match(pattern, raw_line)
        if match:
            minutes = int(match.group(1))
            seconds = int(match.group(2))
            centiseconds = match.group(3)
            # Handle both 2-digit (centiseconds) and 3-digit (milliseconds)
            if len(centiseconds) == 2:
                frac = int(centiseconds) / 100.0
            else:
                frac = int(centiseconds) / 1000.0

            timestamp = minutes * 60 + seconds + frac
            text = match.group(4).strip()

            # Strip enhanced LRC word-level tags if present
            text = re.sub(r"<\d{2}:\d{2}\.\d{2,3}>", "", text).strip()

            if text:  # Skip empty lines (instrumental markers)
                lines.append(LyricLine(text=text, timestamp=timestamp))
        else:
            # Non-timestamped line (metadata like [ar:Artist] or plain text)
            if not raw_line.startswith("["):
                lines.append(LyricLine(text=raw_line))

    return lines


class LRCLIBFetcher:
    """
    Fetch lyrics from LRCLIB.net — free, no auth, community-maintained.
    Returns both synced LRC and plain text when available.
    """

    BASE_URL = "https://lrclib.net/api"

    def fetch(
        self,
        artist: str,
        title: str,
        album: Optional[str] = None,
        duration: Optional[float] = None,
    ) -> Optional[Lyrics]:
        """
        Fetch lyrics by metadata match.
        
        Args:
            artist: Artist name
            title: Track title
            album: Album name (optional, improves match accuracy)
            duration: Track duration in seconds (optional)
        """
        params = {
            "artist_name": artist,
            "track_name": title,
        }
        if album:
            params["album_name"] = album
        if duration:
            params["duration"] = int(duration)

        try:
            resp = requests.get(f"{self.BASE_URL}/get", params=params, timeout=10)
            if resp.status_code == 404:
                logger.debug(f"LRCLIB: no match for {artist} - {title}")
                return None
            resp.raise_for_status()
            data = resp.json()
        except (requests.RequestException, ValueError) as e:
            logger.warning(f"LRCLIB request failed: {e}")
            return None

        synced_lrc = data.get("syncedLyrics")
        plain = data.get("plainLyrics", "")

        if synced_lrc:
            lines = parse_lrc(synced_lrc)
            return Lyrics(
                plain_text=plain or "\n".join(l.text for l in lines),
                lines=lines,
                synced=True,
                source="lrclib",
            )
        elif plain:
            lines = [LyricLine(text=line.strip()) for line in plain.split("\n") if line.strip()]
            return Lyrics(plain_text=plain, lines=lines, synced=False, source="lrclib")

        return None

    def search(self, query: str) -> Optional[Lyrics]:
        """Search LRCLIB by text query (fuzzy)."""
        try:
            resp = requests.get(f"{self.BASE_URL}/search", params={"q": query}, timeout=10)
            if resp.status_code != 200:
                return None
            results = resp.json()
            if not results:
                return None

            # Take best result
            data = results[0]
            synced_lrc = data.get("syncedLyrics")
            plain = data.get("plainLyrics", "")

            if synced_lrc:
                lines = parse_lrc(synced_lrc)
                return Lyrics(
                    plain_text=plain or "\n".join(l.text for l in lines),
                    lines=lines,
                    synced=True,
                    source="lrclib",
                )
            elif plain:
                lines = [LyricLine(text=line.strip()) for line in plain.split("\n") if line.strip()]
                return Lyrics(plain_text=plain, lines=lines, synced=False, source="lrclib")
        except (requests.RequestException, ValueError) as e:
            logger.debug(f"LRCLIB search failed: {e}")

        return None


class SyncedLyricsFetcher:
    """
    Multi-source fetcher using the syncedlyrics library.
    Tries: Lrclib → NetEase → Musixmatch → Megalobiz
    """

    def fetch(self, artist: str, title: str) -> Optional[Lyrics]:
        """Fetch synced lyrics using multiple providers."""
        try:
            import syncedlyrics
        except ImportError:
            logger.warning("syncedlyrics not installed. pip install syncedlyrics")
            return None

        query = f"{artist} {title}"
        try:
            lrc_text = syncedlyrics.search(
                query,
                providers=["Lrclib", "NetEase", "Musixmatch", "Megalobiz"],
                allow_plain_format=True,
            )
        except Exception as e:
            logger.warning(f"syncedlyrics search failed: {e}")
            return None

        if not lrc_text:
            return None

        # Check if it's LRC format (has timestamps)
        if re.search(r"\[\d{2}:\d{2}\.\d{2,3}\]", lrc_text):
            lines = parse_lrc(lrc_text)
            return Lyrics(
                plain_text="\n".join(l.text for l in lines),
                lines=lines,
                synced=True,
                source="syncedlyrics",
            )
        else:
            lines = [LyricLine(text=l.strip()) for l in lrc_text.split("\n") if l.strip()]
            return Lyrics(
                plain_text=lrc_text,
                lines=lines,
                synced=False,
                source="syncedlyrics",
            )


class GeniusFetcher:
    """
    Fetch plain-text lyrics from Genius.
    Requires API token. No synced/timed lyrics available.
    """

    def __init__(self, token: str):
        self.token = token

    def fetch(self, artist: str, title: str) -> Optional[Lyrics]:
        """Fetch lyrics from Genius API."""
        try:
            import lyricsgenius
        except ImportError:
            logger.warning("lyricsgenius not installed. pip install lyricsgenius")
            return None

        try:
            genius = lyricsgenius.Genius(self.token, verbose=False)
            genius.remove_section_headers = True
            song = genius.search_song(title, artist)
            if song and song.lyrics:
                # Clean up Genius formatting artifacts
                text = self._clean_genius_lyrics(song.lyrics)
                lines = [LyricLine(text=l.strip()) for l in text.split("\n") if l.strip()]
                return Lyrics(plain_text=text, lines=lines, synced=False, source="genius")
        except Exception as e:
            logger.warning(f"Genius fetch failed: {e}")

        return None

    @staticmethod
    def _clean_genius_lyrics(raw: str) -> str:
        """Remove Genius-specific formatting."""
        # Remove section headers like [Chorus], [Verse 1]
        text = re.sub(r"\[.*?\]", "", raw)
        # Remove "XEmbed" suffix and contributor info
        text = re.sub(r"\d+Embed$", "", text)
        text = re.sub(r"You might also like", "", text)
        # Clean up multiple blank lines
        text = re.sub(r"\n{3,}", "\n\n", text)
        return text.strip()


def fetch_lyrics(
    artist: str,
    title: str,
    album: Optional[str] = None,
    duration: Optional[float] = None,
    genius_token: Optional[str] = None,
) -> Optional[Lyrics]:
    """
    Fetch lyrics using the best available source.
    
    Priority:
    1. LRCLIB (free, synced, no auth)
    2. syncedlyrics (multi-source, synced)
    3. Genius (plain text, requires token)
    
    Args:
        artist: Artist name
        title: Track title
        album: Album name (optional)
        duration: Track duration in seconds (optional)
        genius_token: Genius API token (optional, for fallback)
        
    Returns:
        Lyrics object or None
    """
    # 1. LRCLIB
    lrclib = LRCLIBFetcher()
    result = lrclib.fetch(artist, title, album, duration)
    if result:
        logger.info(f"Lyrics from LRCLIB (synced={result.synced}): {len(result.words)} words")
        return result

    # 2. syncedlyrics multi-source
    synced = SyncedLyricsFetcher()
    result = synced.fetch(artist, title)
    if result:
        logger.info(f"Lyrics from syncedlyrics (synced={result.synced}): {len(result.words)} words")
        return result

    # 3. Genius (plain text fallback)
    if genius_token:
        genius = GeniusFetcher(genius_token)
        result = genius.fetch(artist, title)
        if result:
            logger.info(f"Lyrics from Genius (plain): {len(result.words)} words")
            return result

    logger.warning(f"No lyrics found for: {artist} - {title}")
    return None