from fastapi import FastAPI, Request
from fastapi.responses import StreamingResponse, JSONResponse
from kokoro import KPipeline
import soundfile as sf
import io
import uvicorn
import langdetect

app = FastAPI()

# ✅ Pre-load both pipelines at startup so there's no delay mid-conversation
pipeline_en = KPipeline(lang_code='a')   # English / Hinglish
pipeline_hi = KPipeline(lang_code='h')   # Hindi

def detect_pipeline(text: str) -> KPipeline:
    """Return the right Kokoro pipeline based on detected language."""
    try:
        lang = langdetect.detect(text)
        if lang == 'hi':
            return pipeline_hi
        else:
            return pipeline_en   # handles English and Hinglish
    except Exception:
        return pipeline_en       # default to English on failure

def detect_voice(text: str) -> str:
    """Pick the right voice for the detected language."""
    try:
        lang = langdetect.detect(text)
        if lang == 'hi':
            return "hf_alpha"    # ✅ Kokoro's Hindi voice
        else:
            return "af_heart"    # your existing English voice
    except Exception:
        return "af_heart"

# Health checks
@app.get("/")
@app.get("/v1")
async def root():
    return {"status": "ok", "service": "kokoro-tts", "languages": ["en", "hi"]}

# Main TTS endpoint
@app.post("/v1/audio/speech")
async def tts(request: Request):
    try:
        data = await request.json()
        text = data.get("input", "")

        if not text:
            return JSONResponse({"error": "No input text"}, status_code=400)

        # ✅ Allow agent.py to override voice via request body
        voice = detect_voice(text)
        pipeline = detect_pipeline(text)

        generator = pipeline(text, voice=voice, speed=1)
        for _, _, audio in generator:
            out = io.BytesIO()
            sf.write(out, audio, 24000, format='mp3')
            out.seek(0)
            return StreamingResponse(out, media_type="audio/mpeg")

    except Exception as e:
        return JSONResponse({"error": str(e)}, status_code=500)

@app.post("/v1/audio/speech/test")
async def tts_test(request: Request):
    try:
        data = await request.json()
        text  = data.get("input", "")
        ratio = float(data.get("ratio", 0.5))  # blend ratio from request

        hindi_voice   = pipeline_hi.voices["hf_alpha"]
        english_voice = pipeline_en.voices["af_heart"]
        blended_voice = torch.lerp(english_voice, hindi_voice, ratio)

        generator = pipeline_en(text, voice=blended_voice, speed=1)
        for _, _, audio in generator:
            out = io.BytesIO()
            sf.write(out, audio, 24000, format='mp3')
            out.seek(0)
            return StreamingResponse(out, media_type="audio/mpeg")
    except Exception as e:
        return JSONResponse({"error": str(e)}, status_code=500) 

if __name__ == "__main__":
    uvicorn.run(app, host="0.0.0.0", port=8000)