Successmove commited on
Commit
08a0d1e
verified
1 Parent(s): b639183

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +141 -0
app.py ADDED
@@ -0,0 +1,141 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import torch
3
+ import tempfile
4
+ import os
5
+ from TTS.api import TTS
6
+
7
+ # Initialize the XTTS model
8
+ device = "cuda" if torch.cuda.is_available() else "cpu"
9
+ print(f"Using device: {device}")
10
+
11
+ # Initialize XTTS model
12
+ tts = TTS("tts_models/multilingual/multi-dataset/xtts_v2").to(device)
13
+
14
+ # Get list of supported languages
15
+ supported_languages = [
16
+ "en", "es", "fr", "de", "it", "pt", "pl", "tr", "ru", "nl",
17
+ "cs", "ar", "zh-cn", "ja", "hu", "ko"
18
+ ]
19
+
20
+ def generate_speech(
21
+ text,
22
+ language,
23
+ speaker_wav=None,
24
+ voice_preset=None,
25
+ speed=1.0,
26
+ temperature=0.7
27
+ ):
28
+ """
29
+ Generate speech from text using XTTS model
30
+ """
31
+ # Create a temporary file for output
32
+ with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp_file:
33
+ output_path = tmp_file.name
34
+
35
+ try:
36
+ # If speaker wav is provided, use it for voice cloning
37
+ if speaker_wav is not None:
38
+ tts.tts_to_file(
39
+ text=text,
40
+ file_path=output_path,
41
+ speaker_wav=speaker_wav,
42
+ language=language,
43
+ speed=speed,
44
+ temperature=temperature
45
+ )
46
+ else:
47
+ # Use default voice if no speaker wav is provided
48
+ tts.tts_to_file(
49
+ text=text,
50
+ file_path=output_path,
51
+ language=language,
52
+ speed=speed,
53
+ temperature=temperature
54
+ )
55
+
56
+ return output_path
57
+ except Exception as e:
58
+ # Clean up temporary file if error occurs
59
+ if os.path.exists(output_path):
60
+ os.unlink(output_path)
61
+ raise gr.Error(f"Error generating speech: {str(e)}")
62
+
63
+ # Create Gradio interface
64
+ with gr.Blocks(title="XTTS Text-to-Speech") as demo:
65
+ gr.Markdown("# XTTS Text-to-Speech Generator")
66
+ gr.Markdown("Generate speech from text with voice cloning capabilities using XTTS v2")
67
+
68
+ with gr.Row():
69
+ with gr.Column():
70
+ text_input = gr.Textbox(
71
+ label="Input Text",
72
+ placeholder="Enter text to convert to speech...",
73
+ lines=3
74
+ )
75
+
76
+ language_input = gr.Dropdown(
77
+ label="Language",
78
+ choices=[(lang, lang) for lang in supported_languages],
79
+ value="en",
80
+ info="Select the language for synthesis"
81
+ )
82
+
83
+ speaker_wav_input = gr.Audio(
84
+ label="Reference Voice (Optional)",
85
+ type="filepath",
86
+ info="Upload a 3-10 second audio sample for voice cloning"
87
+ )
88
+
89
+ with gr.Accordion("Advanced Settings", open=False):
90
+ speed_input = gr.Slider(
91
+ label="Speed",
92
+ minimum=0.5,
93
+ maximum=2.0,
94
+ value=1.0,
95
+ step=0.1,
96
+ info="Speech speed (0.5 = slow, 2.0 = fast)"
97
+ )
98
+
99
+ temperature_input = gr.Slider(
100
+ label="Temperature",
101
+ minimum=0.1,
102
+ maximum=1.0,
103
+ value=0.7,
104
+ step=0.1,
105
+ info="Voice variability (lower = more deterministic)"
106
+ )
107
+
108
+ generate_btn = gr.Button("Generate Speech", variant="primary")
109
+
110
+ with gr.Column():
111
+ audio_output = gr.Audio(
112
+ label="Generated Speech",
113
+ type="filepath"
114
+ )
115
+
116
+ gr.Examples(
117
+ examples=[
118
+ ["Hello, world! This is a sample text to speech generation.", "en"],
119
+ ["Bonjour, comment allez-vous aujourd'hui?", "fr"],
120
+ ["Hola, 驴c贸mo est谩s?", "es"],
121
+ ],
122
+ inputs=[text_input, language_input],
123
+ outputs=audio_output,
124
+ fn=generate_speech,
125
+ cache_examples=True
126
+ )
127
+
128
+ generate_btn.click(
129
+ fn=generate_speech,
130
+ inputs=[
131
+ text_input,
132
+ language_input,
133
+ speaker_wav_input,
134
+ speed_input,
135
+ temperature_input
136
+ ],
137
+ outputs=audio_output
138
+ )
139
+
140
+ if __name__ == "__main__":
141
+ demo.launch(server_name="0.0.0.0", server_port=7860)