LTTEAM commited on
Commit
1fb759a
·
verified ·
1 Parent(s): dbc2e4d

Update fix

Browse files
Files changed (1) hide show
  1. fix +426 -385
fix CHANGED
@@ -1,386 +1,427 @@
1
- import json
2
- import asyncio
3
- import os
4
- import re
5
- import subprocess
6
- from pathlib import Path
7
-
8
- import gradio as gr
9
- from edge_tts import Communicate
10
- from gtts import gTTS
11
-
12
- VOICES_FILE = Path(__file__).parent / "voices.json"
13
- with open(VOICES_FILE, encoding="utf-8") as f:
14
- voices_data = json.load(f)
15
-
16
- languages = sorted(voices_data.keys())
17
- def get_genders(lang):
18
- return sorted(voices_data.get(lang, {}).keys())
19
- def get_edge_voices(lang, gender):
20
- return [v["display_name"] for v in voices_data.get(lang, {}).get(gender, [])]
21
-
22
- # Map Edge display_name → voice_code
23
- edge_voice_map = {
24
- v["display_name"]: v["voice_code"]
25
- for block in voices_data.values()
26
- for gender_block in block.values()
27
- for v in gender_block
28
- }
29
-
30
-
31
- MAX_CHARS = 5000 # Edge-TTS giới hạn ~5000 ký tự
32
-
33
- def split_text(text, max_chars=MAX_CHARS):
34
- parts = re.split(r'(?<=[。!?.!?,;:])', text)
35
- segments, buf = [], ""
36
- for part in parts:
37
- if len(buf) + len(part) <= max_chars:
38
- buf += part
39
- else:
40
- if buf:
41
- segments.append(buf)
42
- while len(part) > max_chars:
43
- segments.append(part[:max_chars])
44
- part = part[max_chars:]
45
- buf = part
46
- if buf:
47
- segments.append(buf)
48
- return segments
49
-
50
- def merge_audios(seg_paths, output_path):
51
- list_file = "concat_list.txt"
52
- with open(list_file, "w", encoding="utf-8") as f:
53
- for p in seg_paths:
54
- f.write(f"file '{os.path.abspath(p)}'\n")
55
- subprocess.run([
56
- "ffmpeg", "-y", "-f", "concat", "-safe", "0",
57
- "-i", list_file, "-c", "copy", output_path
58
- ], check=True)
59
- os.remove(list_file)
60
- for p in seg_paths:
61
- os.remove(p)
62
-
63
- def get_audio_duration(path):
64
- # trả về duration in seconds (float) qua ffprobe
65
- out = subprocess.check_output([
66
- "ffprobe", "-v", "error",
67
- "-show_entries", "format=duration",
68
- "-of", "default=noprint_wrappers=1:nokey=1",
69
- path
70
- ])
71
- return float(out.strip())
72
-
73
- async def _edge_tts(text, voice_code, rate, pitch, out_path):
74
- tts = Communicate(text=text, voice=voice_code, rate=rate, pitch=pitch)
75
- await tts.save(out_path)
76
-
77
- def run_edge_tts(text, voice_name, rate_slider, pitch_slider, out_path):
78
- rate = f"{rate_slider:+d}%"
79
- pitch = f"{pitch_slider:+d}Hz"
80
- voice_code = edge_voice_map[voice_name]
81
- if os.path.exists(out_path):
82
- os.remove(out_path)
83
- asyncio.run(_edge_tts(text, voice_code, rate, pitch, out_path))
84
-
85
- def run_google_tts(text, lang_code, out_path):
86
- # lang_code lấy từ ngôn ngữ như 'vi' hoặc 'en'
87
- tts = gTTS(text=text, lang=lang_code)
88
- tts.save(out_path)
89
-
90
- def build_srt(segments, durations, srt_path):
91
- def fmt(ts):
92
- h = int(ts // 3600)
93
- m = int((ts%3600)//60)
94
- s = int(ts%60)
95
- ms = int((ts - int(ts))*1000)
96
- return f"{h:02}:{m:02}:{s:02},{ms:03}"
97
- with open(srt_path, "w", encoding="utf-8") as f:
98
- cum = 0.0
99
- for i, (seg, dur) in enumerate(zip(segments, durations), start=1):
100
- start = fmt(cum)
101
- end = fmt(cum + dur)
102
- f.write(f"{i}\n{start} --> {end}\n{seg.strip()}\n\n")
103
- cum += dur
104
-
105
-
106
- def generate_tts(text, engine, lang, gender, voice_name, rate, pitch):
107
- if not text.strip():
108
- return None, None
109
- segments = split_text(text)
110
- seg_files, durations = [], []
111
- # choose synth per engine
112
- for idx, seg in enumerate(segments):
113
- seg_path = f"seg_{idx}.mp3"
114
- if engine == "Edge":
115
- run_edge_tts(seg, voice_name, rate, pitch, seg_path)
116
- else:
117
- # google chỉ dùng mã 'vi' nếu Vietnamese, else default 'en'
118
- lang_code = "vi" if "Việt" in lang else "en"
119
- run_google_tts(seg, lang_code, seg_path)
120
- seg_files.append(seg_path)
121
- durations.append(get_audio_duration(seg_path))
122
- out_audio = "LyTranTTS.mp3"
123
- merge_audios(seg_files, out_audio)
124
- # build .srt
125
- srt_file = "LyTranTTS.srt"
126
- build_srt(segments, durations, srt_file)
127
- return out_audio, srt_file
128
-
129
-
130
- def preview_voice(engine, lang, gender, voice_name, rate, pitch):
131
- text = "Xin chào! Đây là ứng dụng chuyển văn bản thành giọng, nói được phát triển bởi Lý Trần." if "Việt" in lang else "Hello, This is a Text to Speech, Speech App developed by Ly Tran"
132
- out = "LyTranTTS.mp3"
133
- if engine == "Edge":
134
- run_edge_tts(text, voice_name, rate, pitch, out)
135
- else:
136
- lang_code = "vi" if "Việt" in lang else "en"
137
- run_google_tts(text, lang_code, out)
138
- return out
139
-
140
-
141
- def on_language_change(lang):
142
- genders = get_genders(lang)
143
- default_gender = genders[0] if genders else None
144
- voices = get_edge_voices(lang, default_gender) if default_gender else []
145
- default_voice = voices[0] if voices else None
146
- return (
147
- gr.update(choices=genders, value=default_gender),
148
- gr.update(choices=voices, value=default_voice),
149
- )
150
-
151
- def on_gender_change(lang, gender):
152
- voices = get_edge_voices(lang, gender)
153
- default_voice = voices[0] if voices else None
154
- return gr.update(choices=voices, value=default_voice)
155
-
156
-
157
- DEFAULT_LANG = "Đa Ngôn Ngữ"
158
-
159
- # Custom CSS for better styling
160
- custom_css = """
161
- :root {
162
- --primary: #4f46e5;
163
- --secondary: #f9fafb;
164
- --accent: #10b981;
165
- --text: #1f2937;
166
- --border: #e5e7eb;
167
- }
168
-
169
- body {
170
- font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
171
- }
172
-
173
- .gradio-container {
174
- max-width: 900px !important;
175
- margin: 0 auto;
176
- background-color: white;
177
- border-radius: 12px;
178
- box-shadow: 0 4px 6px -1px rgba(0, 0, 0, 0.1);
179
- }
180
-
181
- h1 {
182
- color: var(--primary) !important;
183
- font-weight: 700 !important;
184
- margin-bottom: 0.5rem !important;
185
- }
186
-
187
- .description {
188
- color: var(--text) !important;
189
- margin-bottom: 1.5rem !important;
190
- font-size: 1rem !important;
191
- }
192
-
193
- .input-section, .output-section {
194
- background-color: var(--secondary);
195
- padding: 1.5rem;
196
- border-radius: 8px;
197
- margin-bottom: 1.5rem;
198
- border: 1px solid var(--border);
199
- }
200
-
201
- .input-section label, .output-section label {
202
- font-weight: 600 !important;
203
- color: var(--text) !important;
204
- }
205
-
206
- .tab {
207
- background-color: white !important;
208
- border-radius: 8px !important;
209
- padding: 1rem !important;
210
- }
211
-
212
- button {
213
- background-color: var(--primary) !important;
214
- color: white !important;
215
- border: none !important;
216
- border-radius: 6px !important;
217
- padding: 0.5rem 1rem !important;
218
- font-weight: 500 !important;
219
- transition: all 0.2s !important;
220
- }
221
-
222
- button:hover {
223
- background-color: #4338ca !important;
224
- transform: translateY(-1px) !important;
225
- }
226
-
227
- .preview-btn {
228
- background-color: var(--accent) !important;
229
- }
230
-
231
- .preview-btn:hover {
232
- background-color: #0d9b6e !important;
233
- }
234
-
235
- .slider-container {
236
- margin-top: 1rem;
237
- }
238
-
239
- .slider-container label {
240
- margin-bottom: 0.5rem;
241
- display: block;
242
- }
243
-
244
- footer {
245
- text-align: center;
246
- margin-top: 2rem;
247
- color: #6b7280;
248
- font-size: 0.875rem;
249
- }
250
- """
251
-
252
- with gr.Blocks(title="LyTranTTS - Phiên bản miễn phí", css=custom_css) as demo:
253
- # Header Section
254
- with gr.Row():
255
- gr.Markdown("""
256
- <div style="text-align: center; width: 100%;">
257
- <h1 style="margin-bottom: 0;">LyTranTTS - Phiên bản miễn phí</h1>
258
- <p style="color: #6b7280; margin-top: 0.5rem;">Công cụ chuyển văn bản thành giọng nói chuyên nghiệp</p>
259
- </div>
260
- """)
261
-
262
- # Main Description
263
- gr.Markdown("""
264
- <div class="description">
265
- Công cụ chuyển văn bản thành giọng nói với nhiều tùy chọn ngôn ngữ và giọng đọc tự nhiên.
266
- Không giới hạn ký tự & cho phép tải file srt.
267
- </div>
268
- """)
269
-
270
- # Input Section
271
- with gr.Column(elem_classes="input-section"):
272
- # Engine Selection
273
- engine = gr.Radio(
274
- ["Edge", "Google"],
275
- label="Chọn Engine TTS",
276
- value="Edge",
277
- info="Edge TTS hỗ trợ nhiều giọng đọc hơn, Google TTS đơn giản và nhanh"
278
- )
279
-
280
- # Text Input
281
- text_input = gr.Textbox(
282
- label="Nhập văn bản cần chuyển đổi",
283
- lines=8,
284
- placeholder="Nhập hoặc dán văn bản của bạn vào đây...",
285
- elem_id="text-input"
286
- )
287
-
288
- # Voice Configuration
289
- with gr.Row():
290
- with gr.Column():
291
- lang_dd = gr.Dropdown(
292
- label="Ngôn ngữ",
293
- choices=languages,
294
- value=DEFAULT_LANG,
295
- info="Chọn ngôn ngữ phù hợp với văn bản"
296
- )
297
- with gr.Column():
298
- gender_dd = gr.Dropdown(
299
- label="Giới tính giọng đọc",
300
- choices=get_genders(DEFAULT_LANG),
301
- value=get_genders(DEFAULT_LANG)[0],
302
- interactive=True
303
- )
304
- with gr.Column():
305
- voice_dd = gr.Dropdown(
306
- label="Chọn giọng đọc",
307
- choices=get_edge_voices(DEFAULT_LANG, get_genders(DEFAULT_LANG)[0]),
308
- value=get_edge_voices(DEFAULT_LANG, get_genders(DEFAULT_LANG)[0])[0],
309
- info="Chọn giọng đọc yêu thích của bạn"
310
- )
311
-
312
- # Voice Adjustment
313
- with gr.Row():
314
- with gr.Column():
315
- rate_sl = gr.Slider(
316
- -50, 50,
317
- value=0,
318
- step=1,
319
- label="Điều chỉnh tốc độ (%)",
320
- info="Tăng/giảm tốc độ giọng đọc"
321
- )
322
- with gr.Column():
323
- pitch_sl = gr.Slider(
324
- -50, 50,
325
- value=0,
326
- step=1,
327
- label="Điều chỉnh cao độ (Hz)",
328
- info="Tăng/giảm độ cao của giọng nói"
329
- )
330
-
331
- # Action Buttons
332
- with gr.Row():
333
- gen_btn = gr.Button("Tạo giọng nói", variant="primary")
334
- preview_btn = gr.Button("Nghe thử giọng", variant="secondary", elem_classes="preview-btn")
335
-
336
- # Output Section
337
- with gr.Column(elem_classes="output-section"):
338
- with gr.Tabs():
339
- with gr.TabItem("Kết quả chính"):
340
- out_audio = gr.Audio(
341
- label="Âm thanh đầu ra (MP3)",
342
- type="filepath",
343
- autoplay=True,
344
- elem_id="output-audio"
345
- )
346
- out_srt = gr.File(
347
- label="File phụ đề (SRT)",
348
- file_types=[".srt"],
349
- elem_id="output-srt"
350
- )
351
-
352
- with gr.TabItem("Bản xem trước"):
353
- prev_audio = gr.Audio(
354
- label="Bản thử giọng",
355
- type="filepath",
356
- autoplay=True,
357
- elem_id="preview-audio"
358
- )
359
-
360
- # Footer
361
- gr.Markdown("""
362
- <footer>
363
- <p>Phát triển bởi <strong>Lý Trần</strong> | Cộng đồng LTTEAM</p>
364
- <p><a href="https://www.facebook.com/groups/622526090937760" target="_blank">Tham gia nhóm Facebook</a></p>
365
- </footer>
366
- """)
367
-
368
- # Update dropdowns
369
- lang_dd.change(on_language_change, inputs=[lang_dd], outputs=[gender_dd, voice_dd])
370
- gender_dd.change(on_gender_change, inputs=[lang_dd, gender_dd], outputs=[voice_dd])
371
-
372
- # Generate & SRT
373
- gen_btn.click(
374
- fn=generate_tts,
375
- inputs=[text_input, engine, lang_dd, gender_dd, voice_dd, rate_sl, pitch_sl],
376
- outputs=[out_audio, out_srt]
377
- )
378
- # Preview
379
- preview_btn.click(
380
- fn=preview_voice,
381
- inputs=[engine, lang_dd, gender_dd, voice_dd, rate_sl, pitch_sl],
382
- outputs=[prev_audio]
383
- )
384
-
385
- if __name__ == "__main__":
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
386
  demo.launch(server_name="0.0.0.0")
 
1
+ import json
2
+ import asyncio
3
+ import os
4
+ import re
5
+ import subprocess
6
+ from pathlib import Path
7
+
8
+ import gradio as gr
9
+ from edge_tts import Communicate
10
+ from gtts import gTTS
11
+ import requests
12
+ import base64
13
+ import wave
14
+
15
+ # EDGE voices
16
+ VOICES_FILE = Path(__file__).parent / "voices.json"
17
+ with open(VOICES_FILE, encoding="utf-8") as f:
18
+ voices_data = json.load(f)
19
+
20
+ languages = sorted(voices_data.keys())
21
+ def get_genders(lang):
22
+ return sorted(voices_data.get(lang, {}).keys())
23
+
24
+ def get_edge_voices(lang, gender):
25
+ return [v["display_name"] for v in voices_data.get(lang, {}).get(gender, [])]
26
+
27
+ edge_voice_map = {
28
+ v["display_name"]: v["voice_code"]
29
+ for block in voices_data.values()
30
+ for gender_block in block.values()
31
+ for v in gender_block
32
+ }
33
+
34
+ # Gemini voices (hardcoded)
35
+ GEMINI_VOICES = [
36
+ "Zephyr", "Puck", "Charon", "Kore", "Fenrir", "Leda", "Orus", "Aoede",
37
+ "Callirrhoe", "Autonoe", "Enceladus", "Iapetus", "Umbriel", "Algieba",
38
+ "Despina", "Erinome", "Algenib", "Rasalgethi", "Laomedeia", "Achernar",
39
+ "Alnilam", "Schedar", "Gacrux", "Pulcherrima", "Achird",
40
+ "Zubenelgenubi", "Vindemiatrix", "Sadachbia", "Sadaltager", "Sulafat"
41
+ ]
42
+ def get_gemini_voices():
43
+ return GEMINI_VOICES
44
+
45
+ MAX_CHARS = 5000 # Edge-TTS giới hạn ~5000 ký tự
46
+
47
+ def split_text(text, max_chars=MAX_CHARS):
48
+ parts = re.split(r'(?<=[。!?.!?,;:])', text)
49
+ segments, buf = [], ""
50
+ for part in parts:
51
+ if len(buf) + len(part) <= max_chars:
52
+ buf += part
53
+ else:
54
+ if buf:
55
+ segments.append(buf)
56
+ while len(part) > max_chars:
57
+ segments.append(part[:max_chars])
58
+ part = part[max_chars:]
59
+ buf = part
60
+ if buf:
61
+ segments.append(buf)
62
+ return segments
63
+
64
+ def merge_audios(seg_paths, output_path):
65
+ list_file = "concat_list.txt"
66
+ with open(list_file, "w", encoding="utf-8") as f:
67
+ for p in seg_paths:
68
+ f.write(f"file '{os.path.abspath(p)}'\n")
69
+ subprocess.run([
70
+ "ffmpeg", "-y", "-f", "concat", "-safe", "0",
71
+ "-i", list_file, "-c", "copy", output_path
72
+ ], check=True)
73
+ os.remove(list_file)
74
+ for p in seg_paths:
75
+ os.remove(p)
76
+
77
+ def get_audio_duration(path):
78
+ out = subprocess.check_output([
79
+ "ffprobe", "-v", "error",
80
+ "-show_entries", "format=duration",
81
+ "-of", "default=noprint_wrappers=1:nokey=1",
82
+ path
83
+ ])
84
+ return float(out.strip())
85
+
86
+ async def _edge_tts(text, voice_code, rate, pitch, out_path):
87
+ tts = Communicate(text=text, voice=voice_code, rate=rate, pitch=pitch)
88
+ await tts.save(out_path)
89
+
90
+ def run_edge_tts(text, voice_name, rate_slider, pitch_slider, out_path):
91
+ rate = f"{rate_slider:+d}%"
92
+ pitch = f"{pitch_slider:+d}Hz"
93
+ voice_code = edge_voice_map[voice_name]
94
+ if os.path.exists(out_path):
95
+ os.remove(out_path)
96
+ asyncio.run(_edge_tts(text, voice_code, rate, pitch, out_path))
97
+
98
+ def run_google_tts(text, lang_code, out_path):
99
+ tts = gTTS(text=text, lang=lang_code)
100
+ tts.save(out_path)
101
+
102
+ def run_gemini_tts(text, model, api_key, voice_name, out_path):
103
+ API_URL_TEMPLATE = "https://generativelanguage.googleapis.com/v1beta/models/{model}:generateContent"
104
+ url = API_URL_TEMPLATE.format(model=model)
105
+ headers = {
106
+ "Content-Type": "application/json",
107
+ "X-goog-api-key": api_key
108
+ }
109
+ payload = {
110
+ "contents": [
111
+ { "parts": [ { "text": text } ] }
112
+ ],
113
+ "generationConfig": {
114
+ "responseModalities": ["AUDIO"],
115
+ "speechConfig": {
116
+ "voiceConfig": {
117
+ "prebuiltVoiceConfig": {
118
+ "voiceName": voice_name
119
+ }
120
+ }
121
+ }
122
+ }
123
+ }
124
+ resp = requests.post(url, headers=headers, json=payload)
125
+ resp.raise_for_status()
126
+ b64 = resp.json()["candidates"][0]["content"]["parts"][0]["inlineData"]["data"]
127
+ pcm_data = base64.b64decode(b64)
128
+ wav_path = out_path.replace('.mp3', '.wav')
129
+ with wave.open(wav_path, "wb") as wf:
130
+ wf.setnchannels(1)
131
+ wf.setsampwidth(2)
132
+ wf.setframerate(24000)
133
+ wf.writeframes(pcm_data)
134
+ subprocess.run(['ffmpeg', '-y', '-i', wav_path, out_path], check=True)
135
+ os.remove(wav_path)
136
+
137
+ def build_srt(segments, durations, srt_path):
138
+ def fmt(ts):
139
+ h = int(ts // 3600)
140
+ m = int((ts%3600)//60)
141
+ s = int(ts%60)
142
+ ms = int((ts - int(ts))*1000)
143
+ return f"{h:02}:{m:02}:{s:02},{ms:03}"
144
+ with open(srt_path, "w", encoding="utf-8") as f:
145
+ cum = 0.0
146
+ for i, (seg, dur) in enumerate(zip(segments, durations), start=1):
147
+ start = fmt(cum)
148
+ end = fmt(cum + dur)
149
+ f.write(f"{i}\n{start} --> {end}\n{seg.strip()}\n\n")
150
+ cum += dur
151
+
152
+ def generate_tts(text, engine, lang, gender, voice_name, rate, pitch, api_key, gemini_model, gemini_voice):
153
+ if not text.strip():
154
+ return None, None
155
+ segments = split_text(text)
156
+ seg_files, durations = [], []
157
+ for idx, seg in enumerate(segments):
158
+ seg_path = f"seg_{idx}.mp3"
159
+ if engine == "Edge":
160
+ run_edge_tts(seg, voice_name, rate, pitch, seg_path)
161
+ elif engine == "Google":
162
+ lang_code = "vi" if "Việt" in lang else "en"
163
+ run_google_tts(seg, lang_code, seg_path)
164
+ elif engine == "Google API":
165
+ if not api_key:
166
+ raise Exception("Bạn phải nhập API key Google Gemini TTS!")
167
+ run_gemini_tts(seg, gemini_model, api_key, gemini_voice, seg_path)
168
+ seg_files.append(seg_path)
169
+ durations.append(get_audio_duration(seg_path))
170
+ out_audio = "LyTranTTS.mp3"
171
+ merge_audios(seg_files, out_audio)
172
+ srt_file = "LyTranTTS.srt"
173
+ build_srt(segments, durations, srt_file)
174
+ return out_audio, srt_file
175
+
176
+ def preview_voice(engine, lang, gender, voice_name, rate, pitch, api_key, gemini_model, gemini_voice):
177
+ text = "Xin chào! Đây là ứng dụng chuyển văn bản thành giọng, nói được phát triển bởi Lý Trần." if "Việt" in lang else "Hello, This is a Text to Speech, Speech App developed by Ly Tran"
178
+ out = "LyTranTTS.mp3"
179
+ if engine == "Edge":
180
+ run_edge_tts(text, voice_name, rate, pitch, out)
181
+ elif engine == "Google":
182
+ lang_code = "vi" if "Việt" in lang else "en"
183
+ run_google_tts(text, lang_code, out)
184
+ elif engine == "Google API":
185
+ if not api_key:
186
+ raise Exception("Bạn phải nhập API key Google Gemini TTS!")
187
+ run_gemini_tts(text, gemini_model, api_key, gemini_voice, out)
188
+ return out
189
+
190
+ def on_engine_change(engine):
191
+ # Show/hide Gemini fields when engine changes
192
+ show_gemini = engine == "Google API"
193
+ return (
194
+ gr.update(visible=show_gemini), # api_key_input
195
+ gr.update(visible=show_gemini), # gemini_model_input
196
+ gr.update(visible=not show_gemini), # voice_dd (Edge voice)
197
+ gr.update(visible=show_gemini), # gemini_voice_dd
198
+ )
199
+
200
+ def on_language_change(lang):
201
+ genders = get_genders(lang)
202
+ default_gender = genders[0] if genders else None
203
+ voices = get_edge_voices(lang, default_gender) if default_gender else []
204
+ default_voice = voices[0] if voices else None
205
+ return (
206
+ gr.update(choices=genders, value=default_gender),
207
+ gr.update(choices=voices, value=default_voice),
208
+ )
209
+
210
+ def on_gender_change(lang, gender):
211
+ voices = get_edge_voices(lang, gender)
212
+ default_voice = voices[0] if voices else None
213
+ return gr.update(choices=voices, value=default_voice)
214
+
215
+ DEFAULT_LANG = "Đa Ngôn Ngữ"
216
+ DEFAULT_MODEL = "gemini-2.5-flash-preview-tts"
217
+
218
+ custom_css = """
219
+ :root {
220
+ --primary: #4f46e5;
221
+ --secondary: #f9fafb;
222
+ --accent: #10b981;
223
+ --text: #1f2937;
224
+ --border: #e5e7eb;
225
+ }
226
+ body {
227
+ font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
228
+ }
229
+ .gradio-container {
230
+ max-width: 900px !important;
231
+ margin: 0 auto;
232
+ background-color: white;
233
+ border-radius: 12px;
234
+ box-shadow: 0 4px 6px -1px rgba(0, 0, 0, 0.1);
235
+ }
236
+ h1 {
237
+ color: var(--primary) !important;
238
+ font-weight: 700 !important;
239
+ margin-bottom: 0.5rem !important;
240
+ }
241
+ .description {
242
+ color: var(--text) !important;
243
+ margin-bottom: 1.5rem !important;
244
+ font-size: 1rem !important;
245
+ }
246
+ .input-section, .output-section {
247
+ background-color: var(--secondary);
248
+ padding: 1.5rem;
249
+ border-radius: 8px;
250
+ margin-bottom: 1.5rem;
251
+ border: 1px solid var(--border);
252
+ }
253
+ .input-section label, .output-section label {
254
+ font-weight: 600 !important;
255
+ color: var(--text) !important;
256
+ }
257
+ .tab {
258
+ background-color: white !important;
259
+ border-radius: 8px !important;
260
+ padding: 1rem !important;
261
+ }
262
+ button {
263
+ background-color: var(--primary) !important;
264
+ color: white !important;
265
+ border: none !important;
266
+ border-radius: 6px !important;
267
+ padding: 0.5rem 1rem !important;
268
+ font-weight: 500 !important;
269
+ transition: all 0.2s !important;
270
+ }
271
+ button:hover {
272
+ background-color: #4338ca !important;
273
+ transform: translateY(-1px) !important;
274
+ }
275
+ .preview-btn {
276
+ background-color: var(--accent) !important;
277
+ }
278
+ .preview-btn:hover {
279
+ background-color: #0d9b6e !important;
280
+ }
281
+ .slider-container {
282
+ margin-top: 1rem;
283
+ }
284
+ .slider-container label {
285
+ margin-bottom: 0.5rem;
286
+ display: block;
287
+ }
288
+ footer {
289
+ text-align: center;
290
+ margin-top: 2rem;
291
+ color: #6b7280;
292
+ font-size: 0.875rem;
293
+ }
294
+ """
295
+
296
+ with gr.Blocks(title="LyTranTTS - Phiên bản miễn phí", css=custom_css) as demo:
297
+ with gr.Row():
298
+ gr.Markdown("""
299
+ <div style="text-align: center; width: 100%;">
300
+ <h1 style="margin-bottom: 0;">LyTranTTS - Phiên bản miễn phí</h1>
301
+ <p style="color: #6b7280; margin-top: 0.5rem;">Công cụ chuyển văn bản thành giọng nói chuyên nghiệp</p>
302
+ </div>
303
+ """)
304
+ gr.Markdown("""
305
+ <div class="description">
306
+ Công cụ chuyển văn bản thành giọng nói với nhiều tùy chọn ngôn ngữ và giọng đọc tự nhiên.
307
+ Không giới hạn ký tự & cho phép tải file srt.
308
+ </div>
309
+ """)
310
+ with gr.Column(elem_classes="input-section"):
311
+ engine = gr.Radio(
312
+ ["Edge", "Google", "Google API"],
313
+ label="Chọn Engine TTS",
314
+ value="Edge",
315
+ info="Edge TTS hỗ trợ nhiều giọng đọc hơn, Google TTS đơn giản và nhanh, Google API dùng API key Gemini"
316
+ )
317
+ text_input = gr.Textbox(
318
+ label="Nhập văn bản cần chuyển đổi",
319
+ lines=8,
320
+ placeholder="Nhập hoặc dán văn bản của bạn vào đây...",
321
+ elem_id="text-input"
322
+ )
323
+ with gr.Row():
324
+ with gr.Column():
325
+ lang_dd = gr.Dropdown(
326
+ label="Ngôn ngữ",
327
+ choices=languages,
328
+ value=DEFAULT_LANG,
329
+ info="Chọn ngôn ngữ phù hợp với văn bản"
330
+ )
331
+ with gr.Column():
332
+ gender_dd = gr.Dropdown(
333
+ label="Giới tính giọng đọc",
334
+ choices=get_genders(DEFAULT_LANG),
335
+ value=get_genders(DEFAULT_LANG)[0],
336
+ interactive=True
337
+ )
338
+ with gr.Column():
339
+ voice_dd = gr.Dropdown(
340
+ label="Chọn giọng đọc",
341
+ choices=get_edge_voices(DEFAULT_LANG, get_genders(DEFAULT_LANG)[0]),
342
+ value=get_edge_voices(DEFAULT_LANG, get_genders(DEFAULT_LANG)[0])[0],
343
+ info="Chọn giọng đọc yêu thích của bạn"
344
+ )
345
+ with gr.Column():
346
+ gemini_voice_dd = gr.Dropdown(
347
+ label="Giọng Gemini",
348
+ choices=get_gemini_voices(),
349
+ value=get_gemini_voices()[0],
350
+ visible=False
351
+ )
352
+ with gr.Row():
353
+ with gr.Column():
354
+ rate_sl = gr.Slider(
355
+ -50, 50,
356
+ value=0,
357
+ step=1,
358
+ label="Điều chỉnh tốc độ (%)",
359
+ info="Tăng/giảm tốc độ giọng đọc"
360
+ )
361
+ with gr.Column():
362
+ pitch_sl = gr.Slider(
363
+ -50, 50,
364
+ value=0,
365
+ step=1,
366
+ label="Điều chỉnh cao độ (Hz)",
367
+ info="Tăng/giảm độ cao của giọng nói"
368
+ )
369
+ with gr.Row():
370
+ api_key_input = gr.Textbox(
371
+ label="Google Gemini API key",
372
+ placeholder="Nhập API key Google Gemini của bạn...",
373
+ visible=False
374
+ )
375
+ gemini_model_input = gr.Textbox(
376
+ label="Model Gemini TTS",
377
+ value=DEFAULT_MODEL,
378
+ visible=False
379
+ )
380
+ with gr.Row():
381
+ gen_btn = gr.Button("Tạo giọng nói", variant="primary")
382
+ preview_btn = gr.Button("Nghe thử giọng", variant="secondary", elem_classes="preview-btn")
383
+ with gr.Column(elem_classes="output-section"):
384
+ with gr.Tabs():
385
+ with gr.TabItem("Kết quả chính"):
386
+ out_audio = gr.Audio(
387
+ label="Âm thanh đầu ra (MP3)",
388
+ type="filepath",
389
+ autoplay=True,
390
+ elem_id="output-audio"
391
+ )
392
+ out_srt = gr.File(
393
+ label="File phụ đề (SRT)",
394
+ file_types=[".srt"],
395
+ elem_id="output-srt"
396
+ )
397
+ with gr.TabItem("Bản xem trước"):
398
+ prev_audio = gr.Audio(
399
+ label="Bản thử giọng",
400
+ type="filepath",
401
+ autoplay=True,
402
+ elem_id="preview-audio"
403
+ )
404
+ gr.Markdown("""
405
+ <footer>
406
+ <p>Phát triển bởi <strong>Lý Trần</strong> | Cộng đồng LTTEAM</p>
407
+ <p><a href="https://www.facebook.com/groups/622526090937760" target="_blank">Tham gia nhóm Facebook</a></p>
408
+ </footer>
409
+ """)
410
+ # Update dropdowns khi đổi engine
411
+ engine.change(on_engine_change, inputs=[engine], outputs=[api_key_input, gemini_model_input, voice_dd, gemini_voice_dd])
412
+ lang_dd.change(on_language_change, inputs=[lang_dd], outputs=[gender_dd, voice_dd])
413
+ gender_dd.change(on_gender_change, inputs=[lang_dd, gender_dd], outputs=[voice_dd])
414
+ # Generate
415
+ gen_btn.click(
416
+ fn=generate_tts,
417
+ inputs=[text_input, engine, lang_dd, gender_dd, voice_dd, rate_sl, pitch_sl, api_key_input, gemini_model_input, gemini_voice_dd],
418
+ outputs=[out_audio, out_srt]
419
+ )
420
+ preview_btn.click(
421
+ fn=preview_voice,
422
+ inputs=[engine, lang_dd, gender_dd, voice_dd, rate_sl, pitch_sl, api_key_input, gemini_model_input, gemini_voice_dd],
423
+ outputs=[prev_audio]
424
+ )
425
+
426
+ if __name__ == "__main__":
427
  demo.launch(server_name="0.0.0.0")