import os # Ensure HF cache dirs are set before any HF imports os.environ['HF_HOME'] = '/app/.cache/huggingface' os.environ['HUGGINGFACE_HUB_CACHE'] = '/app/.cache/huggingface/hub' os.environ['TRANSFORMERS_CACHE'] = '/app/.cache/huggingface' from fastapi import FastAPI from fastapi.responses import StreamingResponse from fastapi.staticfiles import StaticFiles from kokoro import KPipeline import io import numpy as np import soundfile as sf app = FastAPI() # Initialize Kokoro TTS pipeline for American English pipeline = KPipeline(lang_code='a') # Mount the static/ directory at /static app.mount( "/static", StaticFiles(directory="static"), name="static" ) @app.get("/tts-stream") def tts_stream(text: str): """ Generate and stream a single continuous WAV audio using af_sky voice. """ # Generate segments without splitting segments = list( pipeline( text, voice='af_sky', speed=1.0, split_pattern=r'$^' # never matches → full text as one segment ) ) # Concatenate into one array audio_full = np.concatenate([audio for (_, _, audio) in segments]) # Write to in-memory buffer as WAV buf = io.BytesIO() sf.write(buf, audio_full, 24000, format='WAV') buf.seek(0) return StreamingResponse(buf, media_type='audio/wav') @app.get("/tts-file") def tts_file(text: str): """ Generate a full WAV file, save to static/, and return its URL. """ segments = list( pipeline( text, voice='af_sky', speed=1.0, split_pattern=r'$^' ) ) audio_full = np.concatenate([audio for (_, _, audio) in segments]) output_path = 'static/output_full.wav' sf.write(output_path, audio_full, 24000) return {"url": f"/static/output_full.wav"}