sky / main.py
triflix's picture
Update main.py
86716a1 verified
import os
# Ensure HF cache dirs are set before any HF imports
os.environ['HF_HOME'] = '/app/.cache/huggingface'
os.environ['HUGGINGFACE_HUB_CACHE'] = '/app/.cache/huggingface/hub'
os.environ['TRANSFORMERS_CACHE'] = '/app/.cache/huggingface'
from fastapi import FastAPI
from fastapi.responses import StreamingResponse
from fastapi.staticfiles import StaticFiles
from kokoro import KPipeline
import io
import numpy as np
import soundfile as sf
app = FastAPI()
# Initialize Kokoro TTS pipeline for American English
pipeline = KPipeline(lang_code='a')
# Mount the static/ directory at /static
app.mount(
"/static", StaticFiles(directory="static"), name="static"
)
@app.get("/tts-stream")
def tts_stream(text: str):
"""
Generate and stream a single continuous WAV audio using af_sky voice.
"""
# Generate segments without splitting
segments = list(
pipeline(
text,
voice='af_sky',
speed=1.0,
split_pattern=r'$^' # never matches → full text as one segment
)
)
# Concatenate into one array
audio_full = np.concatenate([audio for (_, _, audio) in segments])
# Write to in-memory buffer as WAV
buf = io.BytesIO()
sf.write(buf, audio_full, 24000, format='WAV')
buf.seek(0)
return StreamingResponse(buf, media_type='audio/wav')
@app.get("/tts-file")
def tts_file(text: str):
"""
Generate a full WAV file, save to static/, and return its URL.
"""
segments = list(
pipeline(
text,
voice='af_sky',
speed=1.0,
split_pattern=r'$^'
)
)
audio_full = np.concatenate([audio for (_, _, audio) in segments])
output_path = 'static/output_full.wav'
sf.write(output_path, audio_full, 24000)
return {"url": f"/static/output_full.wav"}