Spaces:

abc1181
/

livekit-tts

Sleeping

App Files Files Community

livekit-tts / server.py

abc1181

Update server.py

778ccba verified 3 months ago

raw

history blame contribute delete

2.92 kB

	from fastapi import FastAPI, Request
	from fastapi.responses import StreamingResponse, JSONResponse
	from kokoro import KPipeline
	import soundfile as sf
	import io
	import uvicorn
	import langdetect

	app = FastAPI()

	# ✅ Pre-load both pipelines at startup so there's no delay mid-conversation
	pipeline_en = KPipeline(lang_code='a') # English / Hinglish
	pipeline_hi = KPipeline(lang_code='h') # Hindi

	def detect_pipeline(text: str) -> KPipeline:
	"""Return the right Kokoro pipeline based on detected language."""
	try:
	lang = langdetect.detect(text)
	if lang == 'hi':
	return pipeline_hi
	else:
	return pipeline_en # handles English and Hinglish
	except Exception:
	return pipeline_en # default to English on failure

	def detect_voice(text: str) -> str:
	"""Pick the right voice for the detected language."""
	try:
	lang = langdetect.detect(text)
	if lang == 'hi':
	return "hf_alpha" # ✅ Kokoro's Hindi voice
	else:
	return "af_heart" # your existing English voice
	except Exception:
	return "af_heart"

	# Health checks
	@app.get("/")
	@app.get("/v1")
	async def root():
	return {"status": "ok", "service": "kokoro-tts", "languages": ["en", "hi"]}

	# Main TTS endpoint
	@app.post("/v1/audio/speech")
	async def tts(request: Request):
	try:
	data = await request.json()
	text = data.get("input", "")

	if not text:
	return JSONResponse({"error": "No input text"}, status_code=400)

	# ✅ Allow agent.py to override voice via request body
	voice = detect_voice(text)
	pipeline = detect_pipeline(text)

	generator = pipeline(text, voice=voice, speed=1)
	for _, _, audio in generator:
	out = io.BytesIO()
	sf.write(out, audio, 24000, format='mp3')
	out.seek(0)
	return StreamingResponse(out, media_type="audio/mpeg")

	except Exception as e:
	return JSONResponse({"error": str(e)}, status_code=500)

	@app.post("/v1/audio/speech/test")
	async def tts_test(request: Request):
	try:
	data = await request.json()
	text = data.get("input", "")
	ratio = float(data.get("ratio", 0.5)) # blend ratio from request

	hindi_voice = pipeline_hi.voices["hf_alpha"]
	english_voice = pipeline_en.voices["af_heart"]
	blended_voice = torch.lerp(english_voice, hindi_voice, ratio)

	generator = pipeline_en(text, voice=blended_voice, speed=1)
	for _, _, audio in generator:
	out = io.BytesIO()
	sf.write(out, audio, 24000, format='mp3')
	out.seek(0)
	return StreamingResponse(out, media_type="audio/mpeg")
	except Exception as e:
	return JSONResponse({"error": str(e)}, status_code=500)

	if __name__ == "__main__":
	uvicorn.run(app, host="0.0.0.0", port=8000)