| import os |
|
|
| |
| os.environ["HF_HOME"] = "/tmp/hf_cache" |
| os.environ["TRANSFORMERS_CACHE"] = "/tmp/hf_cache" |
|
|
| from fastapi import FastAPI, Query |
| from fastapi.responses import StreamingResponse |
| import io |
| import torch |
| import soundfile as sf |
| from kokoro import KPipeline |
|
|
| app = FastAPI(title="Kokoro TTS API") |
|
|
| |
| pipeline = KPipeline(lang_code='a') |
|
|
| @app.post("/tts") |
| async def tts_endpoint(text: str = Query(..., min_length=1)): |
| """ |
| Convert text to speech and stream WAV. |
| """ |
| try: |
| |
| generator = pipeline(text, voice='af_heart', speed=1.0, split_pattern=r'\n+') |
| audio_list = [audio for _, _, _, audio in generator] |
| audio_tensor = torch.cat(audio_list, dim=0) |
| except Exception as e: |
| return {"error": f"TTS generation failed: {e}"} |
|
|
| |
| buffer = io.BytesIO() |
| sf.write(buffer, audio_tensor.numpy(), 24000, format="WAV") |
| buffer.seek(0) |
|
|
| return StreamingResponse(buffer, media_type="audio/wav") |
|
|