File size: 1,837 Bytes
86716a1
 
 
 
 
 
ef1f52d
 
 
 
 
 
 
 
 
86716a1
ef1f52d
 
86716a1
 
 
 
ef1f52d
 
 
86716a1
 
 
 
 
 
 
 
 
 
 
 
ef1f52d
86716a1
 
ef1f52d
86716a1
ef1f52d
86716a1
ef1f52d
 
 
86716a1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
import os
# Ensure HF cache dirs are set before any HF imports
os.environ['HF_HOME'] = '/app/.cache/huggingface'
os.environ['HUGGINGFACE_HUB_CACHE'] = '/app/.cache/huggingface/hub'
os.environ['TRANSFORMERS_CACHE'] = '/app/.cache/huggingface'

from fastapi import FastAPI
from fastapi.responses import StreamingResponse
from fastapi.staticfiles import StaticFiles
from kokoro import KPipeline
import io
import numpy as np
import soundfile as sf

app = FastAPI()
# Initialize Kokoro TTS pipeline for American English
pipeline = KPipeline(lang_code='a')

# Mount the static/ directory at /static
app.mount(
    "/static", StaticFiles(directory="static"), name="static"
)

@app.get("/tts-stream")
def tts_stream(text: str):
    """
    Generate and stream a single continuous WAV audio using af_sky voice.
    """
    # Generate segments without splitting
    segments = list(
        pipeline(
            text,
            voice='af_sky',
            speed=1.0,
            split_pattern=r'$^'  # never matches → full text as one segment
        )
    )
    # Concatenate into one array
    audio_full = np.concatenate([audio for (_, _, audio) in segments])
    # Write to in-memory buffer as WAV
    buf = io.BytesIO()
    sf.write(buf, audio_full, 24000, format='WAV')
    buf.seek(0)
    return StreamingResponse(buf, media_type='audio/wav')

@app.get("/tts-file")
def tts_file(text: str):
    """
    Generate a full WAV file, save to static/, and return its URL.
    """
    segments = list(
        pipeline(
            text,
            voice='af_sky',
            speed=1.0,
            split_pattern=r'$^'
        )
    )
    audio_full = np.concatenate([audio for (_, _, audio) in segments])
    output_path = 'static/output_full.wav'
    sf.write(output_path, audio_full, 24000)
    return {"url": f"/static/output_full.wav"}