| from fastapi import FastAPI, HTTPException, Query
|
| from fastapi.responses import FileResponse
|
| from fastapi.staticfiles import StaticFiles
|
| from fastapi.middleware.cors import CORSMiddleware
|
| from pydantic import BaseModel
|
| import os
|
| import uuid
|
| import tempfile
|
| from typing import Optional
|
| import soundfile as sf
|
|
|
|
|
| from kokoro import KPipeline
|
|
|
|
|
| pipeline = KPipeline(lang_code='a')
|
|
|
|
|
| app = FastAPI(title="Kokoro TTS API Service")
|
|
|
|
|
| app.add_middleware(
|
| CORSMiddleware,
|
| allow_origins=["*"],
|
| allow_credentials=True,
|
| allow_methods=["*"],
|
| allow_headers=["*"],
|
| )
|
|
|
|
|
| app.mount("/static", StaticFiles(directory="static"), name="static")
|
|
|
|
|
| TEMP_DIR = tempfile.gettempdir()
|
| os.makedirs(TEMP_DIR, exist_ok=True)
|
|
|
| def tts(text, file_name, voice='af_bella', speed=0.9):
|
| """
|
| Generate speech from text using Kokoro TTS
|
|
|
| Args:
|
| text (str): Text to convert to speech
|
| file_name (str): Path to save the output .wav file
|
| voice (str): Voice to use for TTS
|
| speed (float): Speed of speech
|
|
|
| Returns:
|
| str: Path to the generated audio file
|
| """
|
| try:
|
| generator = pipeline(
|
| text, voice=voice,
|
| speed=speed, split_pattern=None
|
| )
|
|
|
| for i, (gs, ps, audio) in enumerate(generator):
|
| sf.write(file_name, audio, 24000)
|
|
|
| return file_name
|
| except Exception as e:
|
| raise Exception(f"TTS generation failed: {str(e)}")
|
|
|
| class TTSRequest(BaseModel):
|
| text: str
|
| voice: str = "af_bella"
|
| speed: float = 0.9
|
|
|
| @app.post("/tts/")
|
| async def text_to_speech(request: TTSRequest):
|
| """
|
| Convert text to speech and return a .wav file
|
| """
|
| try:
|
|
|
| filename = f"{uuid.uuid4()}.wav"
|
| output_path = os.path.join(TEMP_DIR, filename)
|
|
|
|
|
| tts(request.text, output_path, request.voice, request.speed)
|
|
|
|
|
| return FileResponse(
|
| path=output_path,
|
| filename=filename,
|
| media_type="audio/wav"
|
| )
|
| except Exception as e:
|
| raise HTTPException(status_code=500, detail=f"TTS generation failed: {str(e)}")
|
|
|
| @app.get("/tts-get/")
|
| async def text_to_speech_get(
|
| text: str = Query(..., description="Text to convert to speech"),
|
| voice: str = Query("af_bella", description="Voice to use for TTS"),
|
| speed: float = Query(0.9, description="Speed of speech (0.5-1.5)")
|
| ):
|
| """
|
| GET endpoint for text-to-speech conversion
|
| """
|
| try:
|
|
|
| filename = f"{uuid.uuid4()}.wav"
|
| output_path = os.path.join(TEMP_DIR, filename)
|
|
|
|
|
| tts(text, output_path, voice, speed)
|
|
|
|
|
| return FileResponse(
|
| path=output_path,
|
| filename=filename,
|
| media_type="audio/wav"
|
| )
|
| except Exception as e:
|
| raise HTTPException(status_code=500, detail=f"TTS generation failed: {str(e)}")
|
|
|
| @app.get("/voices/")
|
| async def available_voices():
|
| """
|
| Return a list of available voices
|
| """
|
|
|
|
|
| return {
|
| "voices": ["af_bella"],
|
| "default": "af_bella"
|
| }
|
|
|
| @app.get("/")
|
| async def root():
|
| """
|
| Serve the frontend HTML
|
| """
|
| return FileResponse('static/index.html') |