File size: 3,999 Bytes
860a086
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
from fastapi import FastAPI, HTTPException, Query
from fastapi.responses import FileResponse
from fastapi.staticfiles import StaticFiles
from fastapi.middleware.cors import CORSMiddleware
from pydantic import BaseModel
import os
import uuid
import tempfile
from typing import Optional
import soundfile as sf

# Import your TTS dependencies
from kokoro import KPipeline

# Initialize the TTS pipeline
pipeline = KPipeline(lang_code='a')  # Make sure lang_code matches voice

# Initialize FastAPI app
app = FastAPI(title="Kokoro TTS API Service")

# Add CORS middleware to allow frontend requests
app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"],  # In production, replace with your domains
    allow_credentials=True,
    allow_methods=["*"],
    allow_headers=["*"],
)

# Mount static files directory
app.mount("/static", StaticFiles(directory="static"), name="static")

# Create temp directory to store audio files
TEMP_DIR = tempfile.gettempdir()
os.makedirs(TEMP_DIR, exist_ok=True)

def tts(text, file_name, voice='af_bella', speed=0.9):
    """

    Generate speech from text using Kokoro TTS

    

    Args:

        text (str): Text to convert to speech

        file_name (str): Path to save the output .wav file

        voice (str): Voice to use for TTS

        speed (float): Speed of speech

        

    Returns:

        str: Path to the generated audio file

    """
    try:
        generator = pipeline(
            text, voice=voice,
            speed=speed, split_pattern=None
        )

        for i, (gs, ps, audio) in enumerate(generator):
            sf.write(file_name, audio, 24000)  # save audio file
        
        return file_name
    except Exception as e:
        raise Exception(f"TTS generation failed: {str(e)}")

class TTSRequest(BaseModel):
    text: str
    voice: str = "af_bella"
    speed: float = 0.9

@app.post("/tts/")
async def text_to_speech(request: TTSRequest):
    """

    Convert text to speech and return a .wav file

    """
    try:
        # Generate a unique filename
        filename = f"{uuid.uuid4()}.wav"
        output_path = os.path.join(TEMP_DIR, filename)
        
        # Generate speech using your TTS function
        tts(request.text, output_path, request.voice, request.speed)
        
        # Return the audio file
        return FileResponse(
            path=output_path, 
            filename=filename,
            media_type="audio/wav"
        )
    except Exception as e:
        raise HTTPException(status_code=500, detail=f"TTS generation failed: {str(e)}")

@app.get("/tts-get/")
async def text_to_speech_get(

    text: str = Query(..., description="Text to convert to speech"),

    voice: str = Query("af_bella", description="Voice to use for TTS"),

    speed: float = Query(0.9, description="Speed of speech (0.5-1.5)")

):
    """

    GET endpoint for text-to-speech conversion

    """
    try:
        # Generate a unique filename
        filename = f"{uuid.uuid4()}.wav"
        output_path = os.path.join(TEMP_DIR, filename)
        
        # Generate speech using your TTS function
        tts(text, output_path, voice, speed)
        
        # Return the audio file
        return FileResponse(
            path=output_path, 
            filename=filename,
            media_type="audio/wav"
        )
    except Exception as e:
        raise HTTPException(status_code=500, detail=f"TTS generation failed: {str(e)}")

@app.get("/voices/")
async def available_voices():
    """

    Return a list of available voices

    """
    # This is a placeholder - you should replace with actual available voices
    # from your kokoro library
    return {
        "voices": ["af_bella"],  # Add other available voices here
        "default": "af_bella"
    }

@app.get("/")
async def root():
    """

    Serve the frontend HTML

    """
    return FileResponse('static/index.html')