geopromini commited on
Commit
792e819
·
verified ·
1 Parent(s): 1fd11d5

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +78 -67
app.py CHANGED
@@ -6,6 +6,7 @@ import gradio as gr
6
  import soundfile as sf
7
  import tempfile
8
  import torch
 
9
  from vieneu_tts import VieNeuTTS
10
  import time
11
 
@@ -28,7 +29,7 @@ except Exception as e:
28
  return np.random.uniform(-0.1, 0.1, 24000*2)
29
  tts = MockTTS()
30
 
31
- # --- 2. DATA ---
32
  VOICE_SAMPLES = {
33
  "Tuyên (nam miền Bắc)": {"audio": "./sample/Tuyên (nam miền Bắc).wav", "text": "./sample/Tuyên (nam miền Bắc).txt"},
34
  "Thiện Tâm": {"audio": "./sample/thientam.mp3", "text": "./sample/thientam.txt"},
@@ -54,13 +55,22 @@ def load_reference_info(voice_choice):
54
  return None, ""
55
 
56
  @spaces.GPU(duration=120)
57
- def synthesize_speech(text, voice_choice, custom_audio, custom_text, mode_tab):
58
  try:
59
  if not text or text.strip() == "":
60
  return None, "⚠️ Vui lòng nhập nội dung!"
61
- if len(text) > 250:
62
- return None, f"❌ Giới hạn 250 tự (Hiện tại: {len(text)})."
63
-
 
 
 
 
 
 
 
 
 
64
  if mode_tab == "custom_mode":
65
  if custom_audio is None or not custom_text:
66
  return None, "⚠️ Thiếu Audio mẫu hoặc Text mẫu."
@@ -71,126 +81,133 @@ def synthesize_speech(text, voice_choice, custom_audio, custom_text, mode_tab):
71
  with open(VOICE_SAMPLES[voice_choice]["text"], "r", encoding="utf-8") as f:
72
  ref_text_raw = f.read()
73
 
 
74
  start_time = time.time()
75
  ref_codes = tts.encode_reference(ref_audio_path)
76
- wav = tts.infer(text, ref_codes, ref_text_raw)
 
 
 
 
 
 
77
  process_time = time.time() - start_time
78
 
79
  with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp_file:
80
  sf.write(tmp_file.name, wav, 24000)
81
  output_path = tmp_file.name
82
 
83
- return output_path, f"⚡ Xử lý: {process_time:.2f}s"
84
  except Exception as e:
85
  return None, f"❌ Lỗi: {str(e)}"
86
 
87
- # --- 4. THEME & CSS SETUP ---
88
  theme = gr.themes.Default(
89
  primary_hue="indigo",
90
- secondary_hue="cyan",
91
  neutral_hue="slate",
92
  font=[gr.themes.GoogleFont('Inter'), 'sans-serif'],
93
  ).set(
94
- body_background_fill="#070b14",
95
  block_background_fill="#0f172a",
96
  block_border_width="1px",
97
- block_label_text_color="#94a3b8",
98
- button_primary_background_fill="linear-gradient(135deg, #6366f1 0%, #a855f7 100%)",
99
- button_primary_background_fill_hover="linear-gradient(135deg, #4f46e5 0%, #9333ea 100%)",
100
- button_primary_text_color="white",
101
  input_background_fill="#1e293b",
102
  input_border_color="#334155",
 
103
  )
104
 
105
  css = """
106
- .gradio-container { background-color: #070b14 !important; border: none !important; }
107
- .main-wrap { max-width: 1100px !important; margin: auto !important; padding: 40px 20px !important; }
108
- .header-area { text-align: center; margin-bottom: 40px; }
109
- .header-area h1 {
110
- background: linear-gradient(90deg, #60a5fa, #c084fc);
111
- -webkit-background-clip: text; -webkit-text-fill-color: transparent;
112
- font-size: 3rem !important; font-weight: 900 !important; letter-spacing: -1px; margin: 0;
113
- }
114
- .header-area p { color: #64748b; font-size: 1.1rem; margin-top: 10px; }
115
  .st-card {
116
- border-radius: 20px !important;
117
- border: 1px solid rgba(255,255,255,0.08) !important;
118
- box-shadow: 0 20px 50px rgba(0,0,0,0.3) !important;
119
  padding: 15px;
120
  }
121
  .result-card {
122
- margin-top: 25px;
123
- background: rgba(15, 23, 42, 0.6) !important;
124
- border: 1px dashed rgba(99, 102, 241, 0.3) !important;
125
  }
126
- .footer { text-align: center; margin-top: 50px; color: #334155; font-size: 0.8rem; letter-spacing: 1px; }
127
- audio { filter: invert(90%) hue-rotate(180deg) brightness(1.5); width: 100%; }
128
  """
129
 
130
  # --- 5. UI CONSTRUCTION ---
131
- with gr.Blocks(title="VieNeu-TTS Studio") as demo:
132
 
133
  with gr.Column(elem_classes="main-wrap"):
134
- gr.HTML("""
135
- <div class="header-area">
136
- <h1>VieNeu Studio</h1>
137
- <p>Nền tảng chuyển đổi giọng nói AI chuyên nghiệp</p>
138
- </div>
139
- """)
140
-
141
  with gr.Row(equal_height=True):
142
- # Cột trái
143
  with gr.Column(scale=1):
144
  with gr.Group(elem_classes="st-card"):
145
  text_input = gr.Textbox(
146
- label="VĂN BẢN ĐẦU VÀO",
147
- placeholder="Hãy nhập nội dung bạn muốn AI chuyển thành giọng nói...",
148
- lines=11,
149
  show_label=True,
150
  )
151
- char_count = gr.HTML("<div style='text-align: right; color: #475569; font-size: 0.85rem; padding: 5px;'>0 / 250</div>")
152
 
153
- # Cột phải
154
  with gr.Column(scale=1):
155
  with gr.Tabs() as tabs:
156
- with gr.TabItem("👤 Giọng Mẫu", id="preset_mode"):
157
  voice_select = gr.Dropdown(
158
  choices=list(VOICE_SAMPLES.keys()),
159
  value="Tuyên (nam miền Bắc)",
160
- label="Chọn nghệ đọc",
161
  )
162
  with gr.Accordion("Nghe thử giọng mẫu", open=False):
163
  ref_audio_preview = gr.Audio(interactive=False, show_label=False)
164
  ref_text_preview = gr.Markdown("...")
165
 
166
- with gr.TabItem("🎙️ Tự Clone", id="custom_mode"):
167
- gr.Markdown("<p style='color: #94a3b8; font-size: 0.8rem;'>Tải lên audio giọng nói để hệ thống mô phỏng.</p>")
168
- custom_audio = gr.Audio(label="Audio mẫu (.wav/mp3)", type="filepath")
169
- custom_text = gr.Textbox(label="Nội dung audio mẫu", placeholder="Nhập chính xác lời thoại của audio...")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
170
 
171
  current_mode = gr.State(value="preset_mode")
172
 
173
  gr.Markdown("<br>")
174
- btn_generate = gr.Button("TỔNG HỢP NGAY", variant="primary", size="lg")
175
 
176
- # Khu vực kết quả đã sửa lỗi 'style'
177
  with gr.Group(elem_classes="st-card result-card"):
178
- audio_output = gr.Audio(label="KẾT QUẢ AUDIO", interactive=False, autoplay=True)
179
- status_output = gr.Markdown("<p style='text-align: center; color: #6366f1; margin-top:10px;'>✨ Sẵn sàng thực hiện</p>")
180
 
181
- gr.HTML("<div class='footer'>POWERED BY VIENEU-TTS ENGINE • 2025</div>")
182
 
183
  # --- LOGIC ---
184
  def update_count(text):
185
  l = len(text)
186
- color = "#475569" if l <= 250 else "#f43f5e"
187
- return f"<div style='text-align: right; color: {color}; font-size: 0.85rem; font-weight: 600; padding: 5px;'>{l} / 250</div>"
188
 
189
  text_input.change(update_count, text_input, char_count)
190
 
191
  def update_ref_preview(voice):
192
  audio, text = load_reference_info(voice)
193
- return audio, f"> \"{text}\""
194
 
195
  voice_select.change(update_ref_preview, voice_select, [ref_audio_preview, ref_text_preview])
196
  demo.load(update_ref_preview, voice_select, [ref_audio_preview, ref_text_preview])
@@ -200,15 +217,9 @@ with gr.Blocks(title="VieNeu-TTS Studio") as demo:
200
 
201
  btn_generate.click(
202
  fn=synthesize_speech,
203
- inputs=[text_input, voice_select, custom_audio, custom_text, current_mode],
204
  outputs=[audio_output, status_output]
205
  )
206
 
207
  if __name__ == "__main__":
208
- # Đưa theme css vào launch() để tránh cảnh báo trên Gradio 6.0+
209
- demo.queue().launch(
210
- theme=theme,
211
- css=css,
212
- server_name="0.0.0.0",
213
- server_port=7860
214
- )
 
6
  import soundfile as sf
7
  import tempfile
8
  import torch
9
+ import librosa # Thêm thư viện xử lý âm thanh
10
  from vieneu_tts import VieNeuTTS
11
  import time
12
 
 
29
  return np.random.uniform(-0.1, 0.1, 24000*2)
30
  tts = MockTTS()
31
 
32
+ # --- 2. DATA (Giữ nguyên danh sách giọng mẫu) ---
33
  VOICE_SAMPLES = {
34
  "Tuyên (nam miền Bắc)": {"audio": "./sample/Tuyên (nam miền Bắc).wav", "text": "./sample/Tuyên (nam miền Bắc).txt"},
35
  "Thiện Tâm": {"audio": "./sample/thientam.mp3", "text": "./sample/thientam.txt"},
 
55
  return None, ""
56
 
57
  @spaces.GPU(duration=120)
58
+ def synthesize_speech(text, voice_choice, custom_audio, custom_text, mode_tab, pause_level, speed_value):
59
  try:
60
  if not text or text.strip() == "":
61
  return None, "⚠️ Vui lòng nhập nội dung!"
62
+
63
+ # 3.1. Xử độ ngắt nghỉ (Pause level)
64
+ processed_text = text
65
+ if pause_level == "Trung bình":
66
+ processed_text = processed_text.replace(",", ", , ").replace(".", ". . ")
67
+ elif pause_level == "Dài":
68
+ processed_text = processed_text.replace(",", ", , , ").replace(".", ". . . . ")
69
+
70
+ if len(processed_text) > 400:
71
+ processed_text = processed_text[:400]
72
+
73
+ # 3.2. Lấy dữ liệu Reference
74
  if mode_tab == "custom_mode":
75
  if custom_audio is None or not custom_text:
76
  return None, "⚠️ Thiếu Audio mẫu hoặc Text mẫu."
 
81
  with open(VOICE_SAMPLES[voice_choice]["text"], "r", encoding="utf-8") as f:
82
  ref_text_raw = f.read()
83
 
84
+ # 3.3. Thực hiện Inference
85
  start_time = time.time()
86
  ref_codes = tts.encode_reference(ref_audio_path)
87
+ wav = tts.infer(processed_text, ref_codes, ref_text_raw)
88
+
89
+ # 3.4. Điều chỉnh Tốc độ (Speed) bằng librosa
90
+ if speed_value != 1.0:
91
+ # Time stretch giữ nguyên pitch
92
+ wav = librosa.effects.time_stretch(wav, rate=float(speed_value))
93
+
94
  process_time = time.time() - start_time
95
 
96
  with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp_file:
97
  sf.write(tmp_file.name, wav, 24000)
98
  output_path = tmp_file.name
99
 
100
+ return output_path, f"⚡ Xử lý: {process_time:.2f}s | Tốc độ: {speed_value}x"
101
  except Exception as e:
102
  return None, f"❌ Lỗi: {str(e)}"
103
 
104
+ # --- 4. THEME & CSS ---
105
  theme = gr.themes.Default(
106
  primary_hue="indigo",
107
+ secondary_hue="blue",
108
  neutral_hue="slate",
109
  font=[gr.themes.GoogleFont('Inter'), 'sans-serif'],
110
  ).set(
111
+ body_background_fill="#020617",
112
  block_background_fill="#0f172a",
113
  block_border_width="1px",
 
 
 
 
114
  input_background_fill="#1e293b",
115
  input_border_color="#334155",
116
+ button_primary_background_fill="linear-gradient(135deg, #4f46e5 0%, #7c3aed 100%)",
117
  )
118
 
119
  css = """
120
+ .main-wrap { max-width: 1200px !important; margin: auto !important; padding: 20px !important; }
 
 
 
 
 
 
 
 
121
  .st-card {
122
+ border-radius: 16px !important;
123
+ border: 1px solid rgba(255,255,255,0.1) !important;
124
+ box-shadow: 0 4px 20px rgba(0,0,0,0.5) !important;
125
  padding: 15px;
126
  }
127
  .result-card {
128
+ background: linear-gradient(180deg, rgba(15, 23, 42, 0.8) 0%, rgba(30, 41, 59, 0.8) 100%) !important;
129
+ border: 1px solid rgba(99, 102, 241, 0.2) !important;
130
+ margin-top: 15px;
131
  }
132
+ audio { filter: invert(90%) hue-rotate(180deg) brightness(1.5); width: 100%; border-radius: 8px; }
133
+ .footer { text-align: center; margin-top: 40px; color: #475569; font-size: 0.8rem; font-weight: 500; }
134
  """
135
 
136
  # --- 5. UI CONSTRUCTION ---
137
+ with gr.Blocks(title="AI Voice Studio") as demo:
138
 
139
  with gr.Column(elem_classes="main-wrap"):
 
 
 
 
 
 
 
140
  with gr.Row(equal_height=True):
141
+ # CỘT TRÁI
142
  with gr.Column(scale=1):
143
  with gr.Group(elem_classes="st-card"):
144
  text_input = gr.Textbox(
145
+ label="VĂN BẢN CẦN CHUYỂN ĐỔI",
146
+ placeholder="Nhập nội dung vào đây...",
147
+ lines=20, # Tăng thêm để cân bằng với các nút mới
148
  show_label=True,
149
  )
150
+ char_count = gr.HTML("<div style='text-align: right; color: #6366f1; font-size: 0.85rem; font-weight: bold; padding: 5px;'>0 / 250</div>")
151
 
152
+ # CỘT PHẢI
153
  with gr.Column(scale=1):
154
  with gr.Tabs() as tabs:
155
+ with gr.TabItem("👤 Nghệ sĩ đọc", id="preset_mode"):
156
  voice_select = gr.Dropdown(
157
  choices=list(VOICE_SAMPLES.keys()),
158
  value="Tuyên (nam miền Bắc)",
159
+ label="Lựa chọn giọng đọc mẫu",
160
  )
161
  with gr.Accordion("Nghe thử giọng mẫu", open=False):
162
  ref_audio_preview = gr.Audio(interactive=False, show_label=False)
163
  ref_text_preview = gr.Markdown("...")
164
 
165
+ with gr.TabItem("🎙️ Nhân bản (Clone)", id="custom_mode"):
166
+ custom_audio = gr.Audio(label="Audio gốc", type="filepath")
167
+ custom_text = gr.Textbox(
168
+ label="NỘI DUNG AUDIO MẪU",
169
+ placeholder="Nhập lời thoại của audio mẫu...",
170
+ lines=4,
171
+ show_label=True
172
+ )
173
+
174
+ # --- KHU VỰC ĐIỀU CHỈNH ÂM THANH ---
175
+ with gr.Row():
176
+ pause_level = gr.Radio(
177
+ choices=["Mặc định", "Trung bình", "Dài"],
178
+ value="Mặc định",
179
+ label="Độ ngắt nghỉ",
180
+ scale=1
181
+ )
182
+ speed_select = gr.Dropdown(
183
+ choices=[0.8, 0.9, 1.0, 1.1, 1.2, 1.5],
184
+ value=1.0,
185
+ label="Tốc độ đọc",
186
+ scale=1
187
+ )
188
 
189
  current_mode = gr.State(value="preset_mode")
190
 
191
  gr.Markdown("<br>")
192
+ btn_generate = gr.Button("BẮT ĐẦU TỔNG HỢP", variant="primary", size="lg")
193
 
 
194
  with gr.Group(elem_classes="st-card result-card"):
195
+ audio_output = gr.Audio(label="AUDIO KẾT QUẢ", interactive=False, autoplay=True)
196
+ status_output = gr.Markdown("<p style='text-align: center; color: #818cf8; font-weight: 500;'>✨ Sẵn sàng thực hiện</p>")
197
 
198
+ gr.HTML("<div class='footer'>ENGINE BY VIENEU-TTS • PROFESSIONAL AI SOLUTIONS 2025</div>")
199
 
200
  # --- LOGIC ---
201
  def update_count(text):
202
  l = len(text)
203
+ color = "#6366f1" if l <= 250 else "#f43f5e"
204
+ return f"<div style='text-align: right; color: {color}; font-size: 0.85rem; font-weight: bold; padding: 5px;'>{l} / 250</div>"
205
 
206
  text_input.change(update_count, text_input, char_count)
207
 
208
  def update_ref_preview(voice):
209
  audio, text = load_reference_info(voice)
210
+ return audio, f"**Nội dung mẫu:** *\"{text}\"*"
211
 
212
  voice_select.change(update_ref_preview, voice_select, [ref_audio_preview, ref_text_preview])
213
  demo.load(update_ref_preview, voice_select, [ref_audio_preview, ref_text_preview])
 
217
 
218
  btn_generate.click(
219
  fn=synthesize_speech,
220
+ inputs=[text_input, voice_select, custom_audio, custom_text, current_mode, pause_level, speed_select],
221
  outputs=[audio_output, status_output]
222
  )
223
 
224
  if __name__ == "__main__":
225
+ demo.queue().launch(theme=theme, css=css, server_name="0.0.0.0", server_port=7860)