don0726 commited on
Commit
81d7095
·
verified ·
1 Parent(s): 9cf8fc1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +38 -30
app.py CHANGED
@@ -1,40 +1,48 @@
1
- import gradio as gr
2
- import torchaudio
3
- import tempfile
 
 
 
 
4
 
5
- def process(audio, text, lang):
6
- try:
7
- if audio is None:
8
- return None, "Upload audio"
9
-
10
- # load audio
11
- wav, sr = torchaudio.load(audio)
12
 
13
- if wav.shape[0] > 1:
14
- wav = wav.mean(dim=0, keepdim=True)
15
 
16
- # just return same audio (test build)
17
- out_path = tempfile.NamedTemporaryFile(delete=False, suffix=".wav").name
18
- torchaudio.save(out_path, wav, sr)
19
 
20
- return out_path, "✅ Build success (dummy output)"
21
-
22
- except Exception as e:
23
- return None, str(e)
24
 
25
 
26
- with gr.Blocks() as demo:
27
- gr.Markdown("# XTTS Server (Build Test)")
28
-
29
- audio = gr.Audio(type="filepath")
30
- text = gr.Textbox()
31
- lang = gr.Textbox(value="en")
 
 
 
 
 
32
 
33
- btn = gr.Button("Run")
 
34
 
35
- out_audio = gr.Audio()
36
- status = gr.Textbox()
 
 
 
 
 
37
 
38
- btn.click(process, [audio, text, lang], [out_audio, status])
39
 
40
- demo.launch(server_name="0.0.0.0", server_port=7860)
 
 
1
+ import os
2
+ import uuid
3
+ import torch
4
+ import soundfile as sf
5
+ from fastapi import FastAPI, File, UploadFile, Form
6
+ from fastapi.responses import FileResponse
7
+ from TTS.api import TTS
8
 
9
+ app = FastAPI(title="XTTS Voice Cloning API")
 
 
 
 
 
 
10
 
11
+ # Load model once (VERY IMPORTANT)
12
+ device = "cuda" if torch.cuda.is_available() else "cpu"
13
 
14
+ tts = TTS(
15
+ model_name="tts_models/multilingual/multi-dataset/xtts_v2"
16
+ ).to(device)
17
 
18
+ OUTPUT_DIR = "outputs"
19
+ os.makedirs(OUTPUT_DIR, exist_ok=True)
 
 
20
 
21
 
22
+ @app.post("/clone-voice/")
23
+ async def clone_voice(
24
+ text: str = Form(...),
25
+ language: str = Form(...),
26
+ audio: UploadFile = File(...)
27
+ ):
28
+ try:
29
+ # Save uploaded audio
30
+ input_path = f"{OUTPUT_DIR}/{uuid.uuid4()}_input.wav"
31
+ with open(input_path, "wb") as f:
32
+ f.write(await audio.read())
33
 
34
+ # Output file
35
+ output_path = f"{OUTPUT_DIR}/{uuid.uuid4()}_output.wav"
36
 
37
+ # Generate speech
38
+ tts.tts_to_file(
39
+ text=text,
40
+ speaker_wav=input_path,
41
+ language=language,
42
+ file_path=output_path
43
+ )
44
 
45
+ return FileResponse(output_path, media_type="audio/wav")
46
 
47
+ except Exception as e:
48
+ return {"error": str(e)}