import os
import json
import asyncio
import re
from pathlib import Path
from datetime import datetime
from fastapi import FastAPI, UploadFile, File, Form
from fastapi.responses import JSONResponse, FileResponse
import edge_tts
from groq import Groq
from google import genai

app = FastAPI(title="Voice Call AI Bridge Backend")

BASE_DIR = Path(__file__).resolve().parent
DATA_DIR = BASE_DIR / "data"
RECORDINGS_DIR = DATA_DIR / "recordings"
RESPONSES_DIR  = DATA_DIR / "responses"
LOGS_DIR       = DATA_DIR / "logs"

for d in (RECORDINGS_DIR, RESPONSES_DIR, LOGS_DIR):
    d.mkdir(parents=True, exist_ok=True)

# ================================================================
# DYNAMIC API KEYS (unlimited)
# ================================================================
GROQ_API_KEYS = []
i = 1
while True:
    key = os.getenv(f"GROQ_API_KEY_{i}")
    if not key:
        # also try plain GROQ_API_KEY for backward compat
        key = os.getenv("GROQ_API_KEY") if i == 1 else None
    if not key:
        break
    GROQ_API_KEYS.append(key)
    i += 1

GEMINI_API_KEYS = []
i = 1
while True:
    key = os.getenv(f"GEMINI_API_KEY_{i}")
    if not key:
        break
    GEMINI_API_KEYS.append(key)
    i += 1

GROQ_CHAT_MODEL = os.getenv("GROQ_CHAT_MODEL", "llama-3.3-70b-versatile")
GROQ_STT_MODEL  = "whisper-large-v3"   # large-v3 has better Indian language support than turbo
GEMINI_MODEL    = "gemini-2.0-flash"

# Whisper language codes — telling Whisper the language upfront fixes accuracy dramatically
WHISPER_LANG_CODES = {
    "gujarati": "gu",
    "hindi":    "hi",
    "english":  "en",
}

# App sends language hint from user's phone locale/preference
# Fallback: transcribe twice (once with gu, once auto) and pick longer result

print(f"Loaded {len(GROQ_API_KEYS)} Groq key(s), {len(GEMINI_API_KEYS)} Gemini key(s).")

# ================================================================
# MICROSOFT EDGE NEURAL VOICES (free, no API key, excellent quality)
# ================================================================
EDGE_VOICES = {
    "English":  "en-IN-NeerjaNeural",   # Indian English, natural
    "Hindi":    "hi-IN-SwaraNeural",    # Hindi female, very smooth
    "Gujarati": "gu-IN-DhwaniNeural",   # Gujarati female, native quality
}

# ================================================================
# LANGUAGE DETECTION
# ================================================================
def detect_language(text: str) -> str:
    for char in text:
        if '\u0A80' <= char <= '\u0AFF':
            return "Gujarati"
        if '\u0900' <= char <= '\u097F':
            return "Hindi"
    return "English"

# ================================================================
# STT: GROQ WHISPER (tries all keys)
# ================================================================
def transcribe_audio(path: Path, language_hint: str = None) -> str:
    """
    Transcribe audio with Whisper.
    language_hint: "gujarati", "hindi", or "english" — dramatically improves accuracy.
    If not provided, tries Gujarati + Hindi + auto and picks the best result.
    """
    if not GROQ_API_KEYS:
        return "(transcription skipped: no GROQ_API_KEY configured)"

    def _transcribe_with_lang(key: str, lang_code: str = None) -> str:
        client = Groq(api_key=key)
        with path.open("rb") as audio:
            kwargs = dict(
                model=GROQ_STT_MODEL,
                file=audio,
                response_format="verbose_json",
            )
            if lang_code:
                kwargs["language"] = lang_code  # explicit language = much better accuracy
            result = client.audio.transcriptions.create(**kwargs)
        text = getattr(result, "text", None) or (result.get("text", "") if isinstance(result, dict) else "")
        return text.strip()

    for index, key in enumerate(GROQ_API_KEYS):
        try:
            if language_hint:
                # User told us the language — use it directly
                lang_code = WHISPER_LANG_CODES.get(language_hint.lower())
                text = _transcribe_with_lang(key, lang_code)
                print(f"[STT] Key #{index+1} ({language_hint}): {text[:60]}")
                return text

            else:
                # No hint — try Gujarati, Hindi, and auto; pick longest meaningful result
                results = {}
                for lang_name, lang_code in WHISPER_LANG_CODES.items():
                    try:
                        t = _transcribe_with_lang(key, lang_code)
                        if t and t not in [".", "", " "]:
                            results[lang_name] = t
                            print(f"[STT] {lang_name} attempt: {t[:50]}")
                    except Exception:
                        pass

                if not results:
                    continue

                def has_gujarati_script(t):
                    return any('઀' <= c <= '૿' for c in t)

                def has_hindi_script(t):
                    return any('ऀ' <= c <= 'ॿ' for c in t)

                def is_transliterated_english(gujarati_text, english_text):
                    """
                    Detect if Whisper just wrote English words in Gujarati script.
                    Strategy: count how many English words appear phonetically in Gujarati text.
                    Common English loanwords in Gujarati script are a giveaway.
                    Also: if English result has meaningful words and Gujarati has same word count,
                    it's likely transliteration.
                    """
                    # Common English words that Whisper writes in Gujarati script when confused
                    english_in_gujarati_markers = [
                        'એન', 'ધ', 'ઈન', 'ઈઝ', 'ઓફ', 'ટો', 'એ', 'કેન', 'યુ',
                        'આઈ', 'વી', 'ઓહ', 'હાઈ', 'ઓકે', 'યસ', 'નો', 'હેલો',
                        'ટોક', 'સ્પીક', 'ઈટ', 'માય', 'યોર', 'ઈઝ'
                    ]
                    marker_count = sum(1 for m in english_in_gujarati_markers if m in gujarati_text)
                    # If 2+ English markers found in Gujarati text, it's transliteration
                    if marker_count >= 2:
                        return True
                    # If english result is meaningful and gujarati word count matches english
                    guj_words = len(gujarati_text.split())
                    eng_words = len(english_text.split())
                    if english_text and abs(guj_words - eng_words) <= 1 and eng_words > 1:
                        # Same number of words = same sentence just transliterated
                        return True
                    return False

                gu_text  = results.get("gujarati", "")
                hi_text  = results.get("hindi", "")
                en_text  = results.get("english", "")

                if gu_text and has_gujarati_script(gu_text) and not is_transliterated_english(gu_text, en_text):
                    best = gu_text
                    print(f"[STT] Real Gujarati detected: {best[:60]}")
                elif hi_text and has_hindi_script(hi_text):
                    best = hi_text
                    print(f"[STT] Real Hindi detected: {best[:60]}")
                else:
                    best = en_text or gu_text or list(results.values())[0]
                    print(f"[STT] English/fallback selected: {best[:60]}")

                return best

        except Exception as e:
            print(f"[STT] Key #{index+1} failed: {e}")
            continue

    return ""

# ================================================================
# AI: GROQ → GEMINI FALLBACK
# ================================================================
def build_prompt(user_text: str, language: str) -> str:
    return (
        f"You are a helpful voice assistant for rural villagers in India.\n"
        f"You help with: farming tips, crop care, weather advice, government schemes, general questions.\n"
        f"STRICT RULES:\n"
        f"- Detect and reply ONLY in {language}. Use correct script.\n"
        f"- Reply must be SHORT — this is a PHONE CALL. Maximum 2-3 sentences.\n"
        f"- NO bullet points, NO lists, NO markdown. Speak naturally.\n"
        f"- Be warm, simple, and clear for a rural farmer.\n"
        f"\nUser said: {user_text}\n"
        f"Your spoken reply ({language}, 2-3 sentences max):"
    )

def try_groq_chat(prompt: str) -> str | None:
    for index, key in enumerate(GROQ_API_KEYS):
        try:
            client = Groq(api_key=key)
            resp = client.chat.completions.create(
                model=GROQ_CHAT_MODEL,
                messages=[{"role": "user", "content": prompt}],
                max_tokens=120,
                temperature=0.4,
            )
            result = resp.choices[0].message.content.strip()
            print(f"[AI/Groq] Key #{index+1} success")
            return result
        except Exception as e:
            if "429" in str(e) or "rate" in str(e).lower():
                print(f"[AI/Groq] Key #{index+1} rate limited, trying next...")
            else:
                print(f"[AI/Groq] Key #{index+1} error: {e}")
            continue
    return None

def try_gemini_chat(prompt: str) -> str | None:
    for index, key in enumerate(GEMINI_API_KEYS):
        try:
            client = genai.Client(api_key=key)
            resp = client.models.generate_content(model=GEMINI_MODEL, contents=prompt)
            print(f"[AI/Gemini] Key #{index+1} success")
            return resp.text.strip()
        except Exception as e:
            if "429" in str(e) or "quota" in str(e).lower():
                print(f"[AI/Gemini] Key #{index+1} quota exceeded, trying next...")
            else:
                print(f"[AI/Gemini] Key #{index+1} error: {e}")
            continue
    return None

def generate_reply(user_text: str, language: str) -> str:
    prompt = build_prompt(user_text, language)
    result = try_groq_chat(prompt) or try_gemini_chat(prompt)

    if not result:
        return {
            "English":  "Sorry, I could not process your request. Please try again.",
            "Hindi":    "माफ करें, अभी जवाब देने में असमर्थ हूँ। कृपया दोबारा कोशिश करें।",
            "Gujarati": "માફ કરશો, હું હમણાં જવાબ આપી શકતો નથી. ફરી પ્રયાસ કરો."
        }.get(language, "Sorry, unable to respond now.")

    # Clean for speech
    result = re.sub(r"[*_`#]", "", result)
    result = result.replace("\n", " ").strip()
    return result

# ================================================================
# TTS: MICROSOFT EDGE NEURAL (free, excellent Gujarati/Hindi)
# ================================================================
async def synthesize_tts(text: str, language: str, output_path: Path) -> None:
    """Async TTS — must be awaited. Works correctly inside FastAPI/uvicorn."""
    voice = EDGE_VOICES.get(language, EDGE_VOICES["English"])
    print(f"[TTS] Using voice: {voice} for {language}")
    communicate = edge_tts.Communicate(text, voice)
    await communicate.save(str(output_path))

# ================================================================
# HELPERS
# ================================================================
def now_id() -> str:
    return datetime.utcnow().strftime("%Y%m%d_%H%M%S_%f")

# ================================================================
# ENDPOINTS
# ================================================================
@app.get("/health")
def health():
    return {
        "ok": True,
        "groq_keys":   len(GROQ_API_KEYS),
        "gemini_keys": len(GEMINI_API_KEYS),
        "tts_engine":  "Microsoft Edge Neural TTS",
        "voices":      EDGE_VOICES,
        "languages":   ["English", "Hindi", "Gujarati"]
    }


@app.post("/calls/upload")
async def upload_call(
    phone:         str        = Form(default=""),
    device_id:     str        = Form(default="android"),
    meta:          str        = Form(default="{}"),
    language_hint: str        = Form(default=""),   # "gujarati", "hindi", "english" or ""
    audio_file:    UploadFile = File(...),
):
    """
    React Native app sends:
    - audio_file: the recorded call audio (m4a/wav/mp3)
    - phone: caller's number
    - device_id: your gateway device ID
    - meta: any extra JSON metadata

    Returns:
    - transcript: what user said
    - reply_text: AI response
    - reply_audio_url: URL to fetch the MP3 voice response
    - language: detected language
    """
    call_id = now_id()

    # Save uploaded audio
    ext = Path(audio_file.filename or "call.m4a").suffix or ".m4a"
    raw_audio_path = RECORDINGS_DIR / f"{call_id}{ext}"
    raw_audio_path.write_bytes(await audio_file.read())
    print(f"[{call_id}] Audio saved: {raw_audio_path}")

    # Step 1: Transcribe with Groq Whisper
    transcript = transcribe_audio(raw_audio_path, language_hint=language_hint or None)
    print(f"[{call_id}] Transcript: {transcript}")

    # Step 2: Detect language
    language = detect_language(transcript) if transcript else "Hindi"
    print(f"[{call_id}] Language: {language}")

    # Step 3: Generate AI reply
    ai_text = generate_reply(transcript or "Hello", language)
    print(f"[{call_id}] AI Reply: {ai_text}")

    # Step 4: Convert to speech with Edge TTS
    response_mp3 = RESPONSES_DIR / f"{call_id}.mp3"
    await synthesize_tts(ai_text, language, response_mp3)

    # Step 5: Save log
    log_item = {
        "call_id":          call_id,
        "created_at":       datetime.utcnow().isoformat() + "Z",
        "phone":            phone,
        "device_id":        device_id,
        "language":         language,
        "meta":             json.loads(meta or "{}"),
        "audio_path":       str(raw_audio_path),
        "transcript":       transcript,
        "reply_text":       ai_text,
        "reply_audio_path": str(response_mp3),
    }
    (LOGS_DIR / f"{call_id}.json").write_text(
        json.dumps(log_item, indent=2, ensure_ascii=False), encoding="utf-8"
    )

    return JSONResponse({
        "call_id":         call_id,
        "transcript":      transcript,
        "reply_text":      ai_text,
        "reply_audio_url": f"/calls/response/{call_id}",
        "language":        language,
        "language_hint":   language_hint or "auto-detected",
        "stt_model":       GROQ_STT_MODEL,
    })


@app.get("/calls/response/{call_id}")
def get_response(call_id: str):
    """React Native app fetches this MP3 and plays it during the call."""
    mp3 = RESPONSES_DIR / f"{call_id}.mp3"
    if not mp3.exists():
        return JSONResponse({"error": "not_found"}, status_code=404)
    return FileResponse(mp3, media_type="audio/mpeg", filename=f"{call_id}.mp3")


@app.get("/calls/logs")
def get_logs(limit: int = 20):
    """View recent call logs."""
    logs = sorted(LOGS_DIR.glob("*.json"), reverse=True)[:limit]
    return [json.loads(f.read_text(encoding="utf-8")) for f in logs]


@app.get("/calls/logs/{call_id}")
def get_log(call_id: str):
    """View log for a specific call."""
    log_file = LOGS_DIR / f"{call_id}.json"
    if not log_file.exists():
        return JSONResponse({"error": "not_found"}, status_code=404)
    return json.loads(log_file.read_text(encoding="utf-8"))