Spaces:

don0726
/

xtts

Build error

don0726 commited on Mar 20

Commit

81d7095

verified ·

1 Parent(s): 9cf8fc1

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,40 +1,48 @@
-import gradio as gr
-import torchaudio
-import tempfile
-def process(audio, text, lang):
-    try:
-        if audio is None:
-            return None, "Upload audio"
-        # load audio
-        wav, sr = torchaudio.load(audio)
-        if wav.shape[0] > 1:
-            wav = wav.mean(dim=0, keepdim=True)
-        # just return same audio (test build)
-        out_path = tempfile.NamedTemporaryFile(delete=False, suffix=".wav").name
-        torchaudio.save(out_path, wav, sr)
-        return out_path, "✅ Build success (dummy output)"
-    except Exception as e:
-        return None, str(e)
-with gr.Blocks() as demo:
-    gr.Markdown("# XTTS Server (Build Test)")
-    audio = gr.Audio(type="filepath")
-    text = gr.Textbox()
-    lang = gr.Textbox(value="en")
-    btn = gr.Button("Run")
-    out_audio = gr.Audio()
-    status = gr.Textbox()
-    btn.click(process, [audio, text, lang], [out_audio, status])
-demo.launch(server_name="0.0.0.0", server_port=7860)

+import os
+import uuid
+import torch
+import soundfile as sf
+from fastapi import FastAPI, File, UploadFile, Form
+from fastapi.responses import FileResponse
+from TTS.api import TTS
+app = FastAPI(title="XTTS Voice Cloning API")
+# Load model once (VERY IMPORTANT)
+device = "cuda" if torch.cuda.is_available() else "cpu"
+tts = TTS(
+    model_name="tts_models/multilingual/multi-dataset/xtts_v2"
+).to(device)
+OUTPUT_DIR = "outputs"
+os.makedirs(OUTPUT_DIR, exist_ok=True)
+@app.post("/clone-voice/")
+async def clone_voice(
+    text: str = Form(...),
+    language: str = Form(...),
+    audio: UploadFile = File(...)
+):
+    try:
+        # Save uploaded audio
+        input_path = f"{OUTPUT_DIR}/{uuid.uuid4()}_input.wav"
+        with open(input_path, "wb") as f:
+            f.write(await audio.read())
+        # Output file
+        output_path = f"{OUTPUT_DIR}/{uuid.uuid4()}_output.wav"
+        # Generate speech
+        tts.tts_to_file(
+            text=text,
+            speaker_wav=input_path,
+            language=language,
+            file_path=output_path
+        )
+        return FileResponse(output_path, media_type="audio/wav")
+    except Exception as e:
+        return {"error": str(e)}