from fastapi import FastAPI, HTTPException, Query from fastapi.responses import FileResponse from fastapi.staticfiles import StaticFiles from fastapi.middleware.cors import CORSMiddleware from pydantic import BaseModel import os import uuid import tempfile from typing import Optional import soundfile as sf # Import your TTS dependencies from kokoro import KPipeline # Initialize the TTS pipeline pipeline = KPipeline(lang_code='a') # Make sure lang_code matches voice # Initialize FastAPI app app = FastAPI(title="Kokoro TTS API Service") # Add CORS middleware to allow frontend requests app.add_middleware( CORSMiddleware, allow_origins=["*"], # In production, replace with your domains allow_credentials=True, allow_methods=["*"], allow_headers=["*"], ) # Mount static files directory app.mount("/static", StaticFiles(directory="static"), name="static") # Create temp directory to store audio files TEMP_DIR = tempfile.gettempdir() os.makedirs(TEMP_DIR, exist_ok=True) def tts(text, file_name, voice='af_bella', speed=0.9): """ Generate speech from text using Kokoro TTS Args: text (str): Text to convert to speech file_name (str): Path to save the output .wav file voice (str): Voice to use for TTS speed (float): Speed of speech Returns: str: Path to the generated audio file """ try: generator = pipeline( text, voice=voice, speed=speed, split_pattern=None ) for i, (gs, ps, audio) in enumerate(generator): sf.write(file_name, audio, 24000) # save audio file return file_name except Exception as e: raise Exception(f"TTS generation failed: {str(e)}") class TTSRequest(BaseModel): text: str voice: str = "af_bella" speed: float = 0.9 @app.post("/tts/") async def text_to_speech(request: TTSRequest): """ Convert text to speech and return a .wav file """ try: # Generate a unique filename filename = f"{uuid.uuid4()}.wav" output_path = os.path.join(TEMP_DIR, filename) # Generate speech using your TTS function tts(request.text, output_path, request.voice, request.speed) # Return the audio file return FileResponse( path=output_path, filename=filename, media_type="audio/wav" ) except Exception as e: raise HTTPException(status_code=500, detail=f"TTS generation failed: {str(e)}") @app.get("/tts-get/") async def text_to_speech_get( text: str = Query(..., description="Text to convert to speech"), voice: str = Query("af_bella", description="Voice to use for TTS"), speed: float = Query(0.9, description="Speed of speech (0.5-1.5)") ): """ GET endpoint for text-to-speech conversion """ try: # Generate a unique filename filename = f"{uuid.uuid4()}.wav" output_path = os.path.join(TEMP_DIR, filename) # Generate speech using your TTS function tts(text, output_path, voice, speed) # Return the audio file return FileResponse( path=output_path, filename=filename, media_type="audio/wav" ) except Exception as e: raise HTTPException(status_code=500, detail=f"TTS generation failed: {str(e)}") @app.get("/voices/") async def available_voices(): """ Return a list of available voices """ # This is a placeholder - you should replace with actual available voices # from your kokoro library return { "voices": ["af_bella"], # Add other available voices here "default": "af_bella" } @app.get("/") async def root(): """ Serve the frontend HTML """ return FileResponse('static/index.html')