Spaces:
Sleeping
Sleeping
| from fastapi import FastAPI, Request | |
| from fastapi.responses import StreamingResponse, JSONResponse | |
| from kokoro import KPipeline | |
| import soundfile as sf | |
| import io | |
| import uvicorn | |
| import langdetect | |
| app = FastAPI() | |
| # β Pre-load both pipelines at startup so there's no delay mid-conversation | |
| pipeline_en = KPipeline(lang_code='a') # English / Hinglish | |
| pipeline_hi = KPipeline(lang_code='h') # Hindi | |
| def detect_pipeline(text: str) -> KPipeline: | |
| """Return the right Kokoro pipeline based on detected language.""" | |
| try: | |
| lang = langdetect.detect(text) | |
| if lang == 'hi': | |
| return pipeline_hi | |
| else: | |
| return pipeline_en # handles English and Hinglish | |
| except Exception: | |
| return pipeline_en # default to English on failure | |
| def detect_voice(text: str) -> str: | |
| """Pick the right voice for the detected language.""" | |
| try: | |
| lang = langdetect.detect(text) | |
| if lang == 'hi': | |
| return "hf_alpha" # β Kokoro's Hindi voice | |
| else: | |
| return "af_heart" # your existing English voice | |
| except Exception: | |
| return "af_heart" | |
| # Health checks | |
| async def root(): | |
| return {"status": "ok", "service": "kokoro-tts", "languages": ["en", "hi"]} | |
| # Main TTS endpoint | |
| async def tts(request: Request): | |
| try: | |
| data = await request.json() | |
| text = data.get("input", "") | |
| if not text: | |
| return JSONResponse({"error": "No input text"}, status_code=400) | |
| # β Allow agent.py to override voice via request body | |
| voice = detect_voice(text) | |
| pipeline = detect_pipeline(text) | |
| generator = pipeline(text, voice=voice, speed=1) | |
| for _, _, audio in generator: | |
| out = io.BytesIO() | |
| sf.write(out, audio, 24000, format='mp3') | |
| out.seek(0) | |
| return StreamingResponse(out, media_type="audio/mpeg") | |
| except Exception as e: | |
| return JSONResponse({"error": str(e)}, status_code=500) | |
| async def tts_test(request: Request): | |
| try: | |
| data = await request.json() | |
| text = data.get("input", "") | |
| ratio = float(data.get("ratio", 0.5)) # blend ratio from request | |
| hindi_voice = pipeline_hi.voices["hf_alpha"] | |
| english_voice = pipeline_en.voices["af_heart"] | |
| blended_voice = torch.lerp(english_voice, hindi_voice, ratio) | |
| generator = pipeline_en(text, voice=blended_voice, speed=1) | |
| for _, _, audio in generator: | |
| out = io.BytesIO() | |
| sf.write(out, audio, 24000, format='mp3') | |
| out.seek(0) | |
| return StreamingResponse(out, media_type="audio/mpeg") | |
| except Exception as e: | |
| return JSONResponse({"error": str(e)}, status_code=500) | |
| if __name__ == "__main__": | |
| uvicorn.run(app, host="0.0.0.0", port=8000) |