"""
Text utilities for the Puja Verification Service.

Provides LLM-powered translation of:
  - English names / DOB / gotra → Hindi  (via Groq)
  - Sanskrit transcript text   → Hindi  (via Groq)

Falls back to a lightweight rule-based transliteration when the LLM is
unavailable or for purely numeric strings (dates).
"""

import re
import json
import logging

from groq import Groq
from app.config import GROQ_API_KEY

logger = logging.getLogger(__name__)

# ── Groq client (shared with llm_matching_service) ──────────────────────────
_groq_client: Groq | None = None


def _get_groq_client() -> Groq:
    global _groq_client
    if _groq_client is None:
        if not GROQ_API_KEY:
            raise RuntimeError("GROQ_API_KEY is not set")
        _groq_client = Groq(api_key=GROQ_API_KEY)
    return _groq_client


# ── Devanagari digit table (kept for fast digit conversion) ──────────────────
DEVANAGARI_DIGITS = str.maketrans("0123456789", "०१२३४५६७८९")


# ─────────────────────────────────────────────────────────────────────────────
#  LLM-based English → Hindi translation
# ─────────────────────────────────────────────────────────────────────────────

def _translate_english_to_hindi(text: str) -> str:
    """
    Translate an English string (name, date-of-birth, or gotra) into Hindi
    using Groq LLM.  For proper nouns this effectively performs a phonetic
    transliteration; for dates it converts to the Hindi calendar style.

    Returns the Hindi string directly.
    """
    if not text or not text.strip():
        return text

    prompt = (
        "You are a professional English-to-Hindi translator. "
        "Translate the following text into Hindi (Devanagari script). "
        "Rules:\n"
        "- For proper nouns (person names, gotra names), transliterate them "
        "  phonetically into Devanagari (e.g., 'Rahul Sharma' → 'राहुल शर्मा').\n"
        "- For dates, convert to Hindi format with Devanagari numerals "
        "  (e.g., '15 January 1995' → '१५ जनवरी १९९५', '15/01/1995' → '१५/०१/१९९५').\n"
        "- Return ONLY the translated Hindi text, nothing else.\n\n"
        f"Text: {text}"
    )

    client = _get_groq_client()
    response = client.chat.completions.create(
        model="llama-3.1-8b-instant",
        temperature=0,
        max_tokens=256,
        messages=[
            {"role": "system", "content": "You are a translator. Return only the Hindi translation, no explanation."},
            {"role": "user", "content": prompt},
        ],
    )

    result = (response.choices[0].message.content or "").strip()
    # Strip surrounding quotes the LLM sometimes adds
    if result.startswith('"') and result.endswith('"'):
        result = result[1:-1]
    return result if result else text


def convert_api_fields_to_hindi(name: str) -> dict:
    """
    Convert raw API input fields (English) to Hindi using LLM translation.

    Returns a dict with keys: name_hindi.
    """
    name_hindi = _translate_english_to_hindi(name)

    logger.info(
        "Translated fields → name: %s → %s",
        name, name_hindi
    )

    return {
        "name_hindi": name_hindi,
    }


# ─────────────────────────────────────────────────────────────────────────────
#  LLM-based Sanskrit → Hindi translation
# ─────────────────────────────────────────────────────────────────────────────

def sanskrit_to_hindi(text: str) -> str:
    """
    Translate a Sanskrit transcript into fluent Hindi using Groq LLM.

    This replaces the old 7-word dictionary approach and produces a
    meaningful Hindi translation that the downstream LLM matcher can
    search for name / DOB / gotra occurrences.
    """
    if not text or not text.strip():
        return text

    prompt = (
        "You are an expert Sanskrit-to-Hindi translator. "
        "Translate the following Sanskrit text into clear, natural Hindi. "
        "Rules:\n"
        "- Keep all proper nouns (person names, gotra names, deity names) exactly as-is in Devanagari.\n"
        "- Keep dates and numbers exactly as-is.\n"
        "- Translate the rest into simple, fluent Hindi.\n"
        "- Return ONLY the translated Hindi text, no explanation or commentary.\n\n"
        f"Sanskrit text:\n{text}"
    )

    client = _get_groq_client()
    response = client.chat.completions.create(
        model="llama-3.1-8b-instant",
        temperature=0,
        max_tokens=2048,
        messages=[
            {"role": "system", "content": "You are a Sanskrit-Hindi translator. Return only the Hindi translation."},
            {"role": "user", "content": prompt},
        ],
    )

    result = (response.choices[0].message.content or "").strip()
    logger.info("Sanskrit→Hindi translation complete (%d chars → %d chars)", len(text), len(result))
    return result if result else text


# ─────────────────────────────────────────────────────────────────────────────
#  Legacy helpers (kept for backward compatibility)
# ─────────────────────────────────────────────────────────────────────────────

LATIN_TO_DEVANAGARI = {
    "a": "अ", "b": "ब", "c": "क", "d": "द", "e": "ए", "f": "फ", "g": "ग",
    "h": "ह", "i": "इ", "j": "ज", "k": "क", "l": "ल", "m": "म", "n": "न",
    "o": "ओ", "p": "प", "q": "क", "r": "र", "s": "स", "t": "त", "u": "उ",
    "v": "व", "w": "व", "x": "क्स", "y": "य", "z": "ज",
}


def _to_devanagari(text: str) -> str:
    """Rule-based Latin→Devanagari character transliteration (legacy)."""
    converted = []
    for char in text:
        lower_char = char.lower()
        if lower_char in LATIN_TO_DEVANAGARI:
            converted.append(LATIN_TO_DEVANAGARI[lower_char])
        else:
            converted.append(char)
    return "".join(converted).translate(DEVANAGARI_DIGITS)


def build_sanskrit_details(name: str) -> str:
    name_sa = _to_devanagari(name)
    return f"नाम: {name_sa}"


def build_hindi_details(name: str) -> str:
    """Build Hindi-formatted details from English input (uses LLM)."""
    fields = convert_api_fields_to_hindi(name)
    return f"नाम: {fields['name_hindi']}"


def normalize_text(text: str, language: str = "en") -> str:
    if language == "en":
        text = text.lower()
        text = re.sub(r'[^a-z0-9\s]', '', text)
    elif language == "sa":
        text = re.sub(r'[^\u0900-\u097F\s]', '', text)  # Keep only Devanagari
    return text