don0726 commited on
Commit
41ed8fc
Β·
verified Β·
1 Parent(s): 16a7049

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +87 -2
app.py CHANGED
@@ -1,9 +1,94 @@
1
- from TTS.api import TTS
2
  import torch
 
 
 
 
3
 
 
 
 
4
  device = "cpu"
5
 
 
6
  tts = TTS(
7
  model_name="tts_models/multilingual/multi-dataset/xtts_v2",
8
  progress_bar=False
9
- ).to(device)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
  import torch
3
+ import torchaudio
4
+ import tempfile
5
+ import os
6
+ from TTS.api import TTS
7
 
8
+ # ---------------------------
9
+ # Device setup (CPU only)
10
+ # ---------------------------
11
  device = "cpu"
12
 
13
+ print("Loading XTTS model...")
14
  tts = TTS(
15
  model_name="tts_models/multilingual/multi-dataset/xtts_v2",
16
  progress_bar=False
17
+ ).to(device)
18
+ print("Model loaded!")
19
+
20
+ # ---------------------------
21
+ # Voice cloning function
22
+ # ---------------------------
23
+ def clone_voice(audio_file, text, lang):
24
+ try:
25
+ if audio_file is None:
26
+ return None, "❌ Please upload audio"
27
+
28
+ if text.strip() == "":
29
+ return None, "❌ Please enter text"
30
+
31
+ # CPU safety limit
32
+ if len(text) > 200:
33
+ return None, "❌ Text too long (max 200 chars for CPU)"
34
+
35
+ # Load audio
36
+ waveform, sr = torchaudio.load(audio_file)
37
+
38
+ # Convert to mono
39
+ if waveform.shape[0] > 1:
40
+ waveform = waveform.mean(dim=0, keepdim=True)
41
+
42
+ # Save temp speaker audio
43
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp:
44
+ speaker_path = tmp.name
45
+ torchaudio.save(speaker_path, waveform, sr)
46
+
47
+ # Output file
48
+ output_path = tempfile.NamedTemporaryFile(delete=False, suffix=".wav").name
49
+
50
+ # Generate speech
51
+ tts.tts_to_file(
52
+ text=text,
53
+ speaker_wav=speaker_path,
54
+ language=lang,
55
+ file_path=output_path,
56
+ speed=1.1 # slight speed boost
57
+ )
58
+
59
+ return output_path, "βœ… Success"
60
+
61
+ except Exception as e:
62
+ return None, f"❌ Error: {str(e)}"
63
+
64
+
65
+ # ---------------------------
66
+ # Gradio UI
67
+ # ---------------------------
68
+ with gr.Blocks() as demo:
69
+ gr.Markdown("# 🎀 XTTS Voice Cloning (CPU Space)")
70
+ gr.Markdown("Upload a voice sample, enter text, choose language")
71
+
72
+ with gr.Row():
73
+ audio_input = gr.Audio(type="filepath", label="πŸŽ™ Sample Voice")
74
+ text_input = gr.Textbox(label="πŸ“ Text", placeholder="Enter text here...")
75
+
76
+ lang_input = gr.Textbox(
77
+ label="🌐 Language Code",
78
+ value="en",
79
+ placeholder="en, hi, fr, de..."
80
+ )
81
+
82
+ generate_btn = gr.Button("πŸš€ Generate")
83
+
84
+ output_audio = gr.Audio(label="πŸ”Š Output")
85
+ status = gr.Textbox(label="Status")
86
+
87
+ generate_btn.click(
88
+ fn=clone_voice,
89
+ inputs=[audio_input, text_input, lang_input],
90
+ outputs=[output_audio, status]
91
+ )
92
+
93
+ # Required for Hugging Face Spaces
94
+ demo.launch(server_name="0.0.0.0", server_port=7860)