import os import uuid import torch import soundfile as sf from fastapi import FastAPI, File, UploadFile, Form from fastapi.responses import FileResponse from TTS.api import TTS app = FastAPI(title="XTTS Voice Cloning API") # Load model once (VERY IMPORTANT) device = "cuda" if torch.cuda.is_available() else "cpu" tts = TTS( model_name="tts_models/multilingual/multi-dataset/xtts_v2" ).to(device) OUTPUT_DIR = "outputs" os.makedirs(OUTPUT_DIR, exist_ok=True) @app.post("/clone-voice/") async def clone_voice( text: str = Form(...), language: str = Form(...), audio: UploadFile = File(...) ): try: # Save uploaded audio input_path = f"{OUTPUT_DIR}/{uuid.uuid4()}_input.wav" with open(input_path, "wb") as f: f.write(await audio.read()) # Output file output_path = f"{OUTPUT_DIR}/{uuid.uuid4()}_output.wav" # Generate speech tts.tts_to_file( text=text, speaker_wav=input_path, language=language, file_path=output_path ) return FileResponse(output_path, media_type="audio/wav") except Exception as e: return {"error": str(e)}