File size: 1,837 Bytes
86716a1 ef1f52d 86716a1 ef1f52d 86716a1 ef1f52d 86716a1 ef1f52d 86716a1 ef1f52d 86716a1 ef1f52d 86716a1 ef1f52d 86716a1 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 |
import os
# Ensure HF cache dirs are set before any HF imports
os.environ['HF_HOME'] = '/app/.cache/huggingface'
os.environ['HUGGINGFACE_HUB_CACHE'] = '/app/.cache/huggingface/hub'
os.environ['TRANSFORMERS_CACHE'] = '/app/.cache/huggingface'
from fastapi import FastAPI
from fastapi.responses import StreamingResponse
from fastapi.staticfiles import StaticFiles
from kokoro import KPipeline
import io
import numpy as np
import soundfile as sf
app = FastAPI()
# Initialize Kokoro TTS pipeline for American English
pipeline = KPipeline(lang_code='a')
# Mount the static/ directory at /static
app.mount(
"/static", StaticFiles(directory="static"), name="static"
)
@app.get("/tts-stream")
def tts_stream(text: str):
"""
Generate and stream a single continuous WAV audio using af_sky voice.
"""
# Generate segments without splitting
segments = list(
pipeline(
text,
voice='af_sky',
speed=1.0,
split_pattern=r'$^' # never matches → full text as one segment
)
)
# Concatenate into one array
audio_full = np.concatenate([audio for (_, _, audio) in segments])
# Write to in-memory buffer as WAV
buf = io.BytesIO()
sf.write(buf, audio_full, 24000, format='WAV')
buf.seek(0)
return StreamingResponse(buf, media_type='audio/wav')
@app.get("/tts-file")
def tts_file(text: str):
"""
Generate a full WAV file, save to static/, and return its URL.
"""
segments = list(
pipeline(
text,
voice='af_sky',
speed=1.0,
split_pattern=r'$^'
)
)
audio_full = np.concatenate([audio for (_, _, audio) in segments])
output_path = 'static/output_full.wav'
sf.write(output_path, audio_full, 24000)
return {"url": f"/static/output_full.wav"} |