|
|
import os |
|
|
|
|
|
os.environ['HF_HOME'] = '/app/.cache/huggingface' |
|
|
os.environ['HUGGINGFACE_HUB_CACHE'] = '/app/.cache/huggingface/hub' |
|
|
os.environ['TRANSFORMERS_CACHE'] = '/app/.cache/huggingface' |
|
|
|
|
|
from fastapi import FastAPI |
|
|
from fastapi.responses import StreamingResponse |
|
|
from fastapi.staticfiles import StaticFiles |
|
|
from kokoro import KPipeline |
|
|
import io |
|
|
import numpy as np |
|
|
import soundfile as sf |
|
|
|
|
|
app = FastAPI() |
|
|
|
|
|
pipeline = KPipeline(lang_code='a') |
|
|
|
|
|
|
|
|
app.mount( |
|
|
"/static", StaticFiles(directory="static"), name="static" |
|
|
) |
|
|
|
|
|
@app.get("/tts-stream") |
|
|
def tts_stream(text: str): |
|
|
""" |
|
|
Generate and stream a single continuous WAV audio using af_sky voice. |
|
|
""" |
|
|
|
|
|
segments = list( |
|
|
pipeline( |
|
|
text, |
|
|
voice='af_sky', |
|
|
speed=1.0, |
|
|
split_pattern=r'$^' |
|
|
) |
|
|
) |
|
|
|
|
|
audio_full = np.concatenate([audio for (_, _, audio) in segments]) |
|
|
|
|
|
buf = io.BytesIO() |
|
|
sf.write(buf, audio_full, 24000, format='WAV') |
|
|
buf.seek(0) |
|
|
return StreamingResponse(buf, media_type='audio/wav') |
|
|
|
|
|
@app.get("/tts-file") |
|
|
def tts_file(text: str): |
|
|
""" |
|
|
Generate a full WAV file, save to static/, and return its URL. |
|
|
""" |
|
|
segments = list( |
|
|
pipeline( |
|
|
text, |
|
|
voice='af_sky', |
|
|
speed=1.0, |
|
|
split_pattern=r'$^' |
|
|
) |
|
|
) |
|
|
audio_full = np.concatenate([audio for (_, _, audio) in segments]) |
|
|
output_path = 'static/output_full.wav' |
|
|
sf.write(output_path, audio_full, 24000) |
|
|
return {"url": f"/static/output_full.wav"} |