LTTEAM commited on
Commit
f4a73b2
·
verified ·
1 Parent(s): 03a81a9

Delete fix

Browse files
Files changed (1) hide show
  1. fix +0 -427
fix DELETED
@@ -1,427 +0,0 @@
1
- import json
2
- import asyncio
3
- import os
4
- import re
5
- import subprocess
6
- from pathlib import Path
7
-
8
- import gradio as gr
9
- from edge_tts import Communicate
10
- from gtts import gTTS
11
- import requests
12
- import base64
13
- import wave
14
-
15
- # EDGE voices
16
- VOICES_FILE = Path(__file__).parent / "voices.json"
17
- with open(VOICES_FILE, encoding="utf-8") as f:
18
- voices_data = json.load(f)
19
-
20
- languages = sorted(voices_data.keys())
21
- def get_genders(lang):
22
- return sorted(voices_data.get(lang, {}).keys())
23
-
24
- def get_edge_voices(lang, gender):
25
- return [v["display_name"] for v in voices_data.get(lang, {}).get(gender, [])]
26
-
27
- edge_voice_map = {
28
- v["display_name"]: v["voice_code"]
29
- for block in voices_data.values()
30
- for gender_block in block.values()
31
- for v in gender_block
32
- }
33
-
34
- # Gemini voices (hardcoded)
35
- GEMINI_VOICES = [
36
- "Zephyr", "Puck", "Charon", "Kore", "Fenrir", "Leda", "Orus", "Aoede",
37
- "Callirrhoe", "Autonoe", "Enceladus", "Iapetus", "Umbriel", "Algieba",
38
- "Despina", "Erinome", "Algenib", "Rasalgethi", "Laomedeia", "Achernar",
39
- "Alnilam", "Schedar", "Gacrux", "Pulcherrima", "Achird",
40
- "Zubenelgenubi", "Vindemiatrix", "Sadachbia", "Sadaltager", "Sulafat"
41
- ]
42
- def get_gemini_voices():
43
- return GEMINI_VOICES
44
-
45
- MAX_CHARS = 5000 # Edge-TTS giới hạn ~5000 ký tự
46
-
47
- def split_text(text, max_chars=MAX_CHARS):
48
- parts = re.split(r'(?<=[。!?.!?,;:])', text)
49
- segments, buf = [], ""
50
- for part in parts:
51
- if len(buf) + len(part) <= max_chars:
52
- buf += part
53
- else:
54
- if buf:
55
- segments.append(buf)
56
- while len(part) > max_chars:
57
- segments.append(part[:max_chars])
58
- part = part[max_chars:]
59
- buf = part
60
- if buf:
61
- segments.append(buf)
62
- return segments
63
-
64
- def merge_audios(seg_paths, output_path):
65
- list_file = "concat_list.txt"
66
- with open(list_file, "w", encoding="utf-8") as f:
67
- for p in seg_paths:
68
- f.write(f"file '{os.path.abspath(p)}'\n")
69
- subprocess.run([
70
- "ffmpeg", "-y", "-f", "concat", "-safe", "0",
71
- "-i", list_file, "-c", "copy", output_path
72
- ], check=True)
73
- os.remove(list_file)
74
- for p in seg_paths:
75
- os.remove(p)
76
-
77
- def get_audio_duration(path):
78
- out = subprocess.check_output([
79
- "ffprobe", "-v", "error",
80
- "-show_entries", "format=duration",
81
- "-of", "default=noprint_wrappers=1:nokey=1",
82
- path
83
- ])
84
- return float(out.strip())
85
-
86
- async def _edge_tts(text, voice_code, rate, pitch, out_path):
87
- tts = Communicate(text=text, voice=voice_code, rate=rate, pitch=pitch)
88
- await tts.save(out_path)
89
-
90
- def run_edge_tts(text, voice_name, rate_slider, pitch_slider, out_path):
91
- rate = f"{rate_slider:+d}%"
92
- pitch = f"{pitch_slider:+d}Hz"
93
- voice_code = edge_voice_map[voice_name]
94
- if os.path.exists(out_path):
95
- os.remove(out_path)
96
- asyncio.run(_edge_tts(text, voice_code, rate, pitch, out_path))
97
-
98
- def run_google_tts(text, lang_code, out_path):
99
- tts = gTTS(text=text, lang=lang_code)
100
- tts.save(out_path)
101
-
102
- def run_gemini_tts(text, model, api_key, voice_name, out_path):
103
- API_URL_TEMPLATE = "https://generativelanguage.googleapis.com/v1beta/models/{model}:generateContent"
104
- url = API_URL_TEMPLATE.format(model=model)
105
- headers = {
106
- "Content-Type": "application/json",
107
- "X-goog-api-key": api_key
108
- }
109
- payload = {
110
- "contents": [
111
- { "parts": [ { "text": text } ] }
112
- ],
113
- "generationConfig": {
114
- "responseModalities": ["AUDIO"],
115
- "speechConfig": {
116
- "voiceConfig": {
117
- "prebuiltVoiceConfig": {
118
- "voiceName": voice_name
119
- }
120
- }
121
- }
122
- }
123
- }
124
- resp = requests.post(url, headers=headers, json=payload)
125
- resp.raise_for_status()
126
- b64 = resp.json()["candidates"][0]["content"]["parts"][0]["inlineData"]["data"]
127
- pcm_data = base64.b64decode(b64)
128
- wav_path = out_path.replace('.mp3', '.wav')
129
- with wave.open(wav_path, "wb") as wf:
130
- wf.setnchannels(1)
131
- wf.setsampwidth(2)
132
- wf.setframerate(24000)
133
- wf.writeframes(pcm_data)
134
- subprocess.run(['ffmpeg', '-y', '-i', wav_path, out_path], check=True)
135
- os.remove(wav_path)
136
-
137
- def build_srt(segments, durations, srt_path):
138
- def fmt(ts):
139
- h = int(ts // 3600)
140
- m = int((ts%3600)//60)
141
- s = int(ts%60)
142
- ms = int((ts - int(ts))*1000)
143
- return f"{h:02}:{m:02}:{s:02},{ms:03}"
144
- with open(srt_path, "w", encoding="utf-8") as f:
145
- cum = 0.0
146
- for i, (seg, dur) in enumerate(zip(segments, durations), start=1):
147
- start = fmt(cum)
148
- end = fmt(cum + dur)
149
- f.write(f"{i}\n{start} --> {end}\n{seg.strip()}\n\n")
150
- cum += dur
151
-
152
- def generate_tts(text, engine, lang, gender, voice_name, rate, pitch, api_key, gemini_model, gemini_voice):
153
- if not text.strip():
154
- return None, None
155
- segments = split_text(text)
156
- seg_files, durations = [], []
157
- for idx, seg in enumerate(segments):
158
- seg_path = f"seg_{idx}.mp3"
159
- if engine == "Edge":
160
- run_edge_tts(seg, voice_name, rate, pitch, seg_path)
161
- elif engine == "Google":
162
- lang_code = "vi" if "Việt" in lang else "en"
163
- run_google_tts(seg, lang_code, seg_path)
164
- elif engine == "Google API":
165
- if not api_key:
166
- raise Exception("Bạn phải nhập API key Google Gemini TTS!")
167
- run_gemini_tts(seg, gemini_model, api_key, gemini_voice, seg_path)
168
- seg_files.append(seg_path)
169
- durations.append(get_audio_duration(seg_path))
170
- out_audio = "LyTranTTS.mp3"
171
- merge_audios(seg_files, out_audio)
172
- srt_file = "LyTranTTS.srt"
173
- build_srt(segments, durations, srt_file)
174
- return out_audio, srt_file
175
-
176
- def preview_voice(engine, lang, gender, voice_name, rate, pitch, api_key, gemini_model, gemini_voice):
177
- text = "Xin chào! Đây là ứng dụng chuyển văn bản thành giọng, nói được phát triển bởi Lý Trần." if "Việt" in lang else "Hello, This is a Text to Speech, Speech App developed by Ly Tran"
178
- out = "LyTranTTS.mp3"
179
- if engine == "Edge":
180
- run_edge_tts(text, voice_name, rate, pitch, out)
181
- elif engine == "Google":
182
- lang_code = "vi" if "Việt" in lang else "en"
183
- run_google_tts(text, lang_code, out)
184
- elif engine == "Google API":
185
- if not api_key:
186
- raise Exception("Bạn phải nhập API key Google Gemini TTS!")
187
- run_gemini_tts(text, gemini_model, api_key, gemini_voice, out)
188
- return out
189
-
190
- def on_engine_change(engine):
191
- # Show/hide Gemini fields when engine changes
192
- show_gemini = engine == "Google API"
193
- return (
194
- gr.update(visible=show_gemini), # api_key_input
195
- gr.update(visible=show_gemini), # gemini_model_input
196
- gr.update(visible=not show_gemini), # voice_dd (Edge voice)
197
- gr.update(visible=show_gemini), # gemini_voice_dd
198
- )
199
-
200
- def on_language_change(lang):
201
- genders = get_genders(lang)
202
- default_gender = genders[0] if genders else None
203
- voices = get_edge_voices(lang, default_gender) if default_gender else []
204
- default_voice = voices[0] if voices else None
205
- return (
206
- gr.update(choices=genders, value=default_gender),
207
- gr.update(choices=voices, value=default_voice),
208
- )
209
-
210
- def on_gender_change(lang, gender):
211
- voices = get_edge_voices(lang, gender)
212
- default_voice = voices[0] if voices else None
213
- return gr.update(choices=voices, value=default_voice)
214
-
215
- DEFAULT_LANG = "Đa Ngôn Ngữ"
216
- DEFAULT_MODEL = "gemini-2.5-flash-preview-tts"
217
-
218
- custom_css = """
219
- :root {
220
- --primary: #4f46e5;
221
- --secondary: #f9fafb;
222
- --accent: #10b981;
223
- --text: #1f2937;
224
- --border: #e5e7eb;
225
- }
226
- body {
227
- font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
228
- }
229
- .gradio-container {
230
- max-width: 900px !important;
231
- margin: 0 auto;
232
- background-color: white;
233
- border-radius: 12px;
234
- box-shadow: 0 4px 6px -1px rgba(0, 0, 0, 0.1);
235
- }
236
- h1 {
237
- color: var(--primary) !important;
238
- font-weight: 700 !important;
239
- margin-bottom: 0.5rem !important;
240
- }
241
- .description {
242
- color: var(--text) !important;
243
- margin-bottom: 1.5rem !important;
244
- font-size: 1rem !important;
245
- }
246
- .input-section, .output-section {
247
- background-color: var(--secondary);
248
- padding: 1.5rem;
249
- border-radius: 8px;
250
- margin-bottom: 1.5rem;
251
- border: 1px solid var(--border);
252
- }
253
- .input-section label, .output-section label {
254
- font-weight: 600 !important;
255
- color: var(--text) !important;
256
- }
257
- .tab {
258
- background-color: white !important;
259
- border-radius: 8px !important;
260
- padding: 1rem !important;
261
- }
262
- button {
263
- background-color: var(--primary) !important;
264
- color: white !important;
265
- border: none !important;
266
- border-radius: 6px !important;
267
- padding: 0.5rem 1rem !important;
268
- font-weight: 500 !important;
269
- transition: all 0.2s !important;
270
- }
271
- button:hover {
272
- background-color: #4338ca !important;
273
- transform: translateY(-1px) !important;
274
- }
275
- .preview-btn {
276
- background-color: var(--accent) !important;
277
- }
278
- .preview-btn:hover {
279
- background-color: #0d9b6e !important;
280
- }
281
- .slider-container {
282
- margin-top: 1rem;
283
- }
284
- .slider-container label {
285
- margin-bottom: 0.5rem;
286
- display: block;
287
- }
288
- footer {
289
- text-align: center;
290
- margin-top: 2rem;
291
- color: #6b7280;
292
- font-size: 0.875rem;
293
- }
294
- """
295
-
296
- with gr.Blocks(title="LyTranTTS - Phiên bản miễn phí", css=custom_css) as demo:
297
- with gr.Row():
298
- gr.Markdown("""
299
- <div style="text-align: center; width: 100%;">
300
- <h1 style="margin-bottom: 0;">LyTranTTS - Phiên bản miễn phí</h1>
301
- <p style="color: #6b7280; margin-top: 0.5rem;">Công cụ chuyển văn bản thành giọng nói chuyên nghiệp</p>
302
- </div>
303
- """)
304
- gr.Markdown("""
305
- <div class="description">
306
- Công cụ chuyển văn bản thành giọng nói với nhiều tùy chọn ngôn ngữ và giọng đọc tự nhiên.
307
- Không giới hạn ký tự & cho phép tải file srt.
308
- </div>
309
- """)
310
- with gr.Column(elem_classes="input-section"):
311
- engine = gr.Radio(
312
- ["Edge", "Google", "Google API"],
313
- label="Chọn Engine TTS",
314
- value="Edge",
315
- info="Edge TTS hỗ trợ nhiều giọng đọc hơn, Google TTS đơn giản và nhanh, Google API dùng API key Gemini"
316
- )
317
- text_input = gr.Textbox(
318
- label="Nhập văn bản cần chuyển đổi",
319
- lines=8,
320
- placeholder="Nhập hoặc dán văn bản của bạn vào đây...",
321
- elem_id="text-input"
322
- )
323
- with gr.Row():
324
- with gr.Column():
325
- lang_dd = gr.Dropdown(
326
- label="Ngôn ngữ",
327
- choices=languages,
328
- value=DEFAULT_LANG,
329
- info="Chọn ngôn ngữ phù hợp với văn bản"
330
- )
331
- with gr.Column():
332
- gender_dd = gr.Dropdown(
333
- label="Giới tính giọng đọc",
334
- choices=get_genders(DEFAULT_LANG),
335
- value=get_genders(DEFAULT_LANG)[0],
336
- interactive=True
337
- )
338
- with gr.Column():
339
- voice_dd = gr.Dropdown(
340
- label="Chọn giọng đọc",
341
- choices=get_edge_voices(DEFAULT_LANG, get_genders(DEFAULT_LANG)[0]),
342
- value=get_edge_voices(DEFAULT_LANG, get_genders(DEFAULT_LANG)[0])[0],
343
- info="Chọn giọng đọc yêu thích của bạn"
344
- )
345
- with gr.Column():
346
- gemini_voice_dd = gr.Dropdown(
347
- label="Giọng Gemini",
348
- choices=get_gemini_voices(),
349
- value=get_gemini_voices()[0],
350
- visible=False
351
- )
352
- with gr.Row():
353
- with gr.Column():
354
- rate_sl = gr.Slider(
355
- -50, 50,
356
- value=0,
357
- step=1,
358
- label="Điều chỉnh tốc độ (%)",
359
- info="Tăng/giảm tốc độ giọng đọc"
360
- )
361
- with gr.Column():
362
- pitch_sl = gr.Slider(
363
- -50, 50,
364
- value=0,
365
- step=1,
366
- label="Điều chỉnh cao độ (Hz)",
367
- info="Tăng/giảm độ cao của giọng nói"
368
- )
369
- with gr.Row():
370
- api_key_input = gr.Textbox(
371
- label="Google Gemini API key",
372
- placeholder="Nhập API key Google Gemini của bạn...",
373
- visible=False
374
- )
375
- gemini_model_input = gr.Textbox(
376
- label="Model Gemini TTS",
377
- value=DEFAULT_MODEL,
378
- visible=False
379
- )
380
- with gr.Row():
381
- gen_btn = gr.Button("Tạo giọng nói", variant="primary")
382
- preview_btn = gr.Button("Nghe thử giọng", variant="secondary", elem_classes="preview-btn")
383
- with gr.Column(elem_classes="output-section"):
384
- with gr.Tabs():
385
- with gr.TabItem("Kết quả chính"):
386
- out_audio = gr.Audio(
387
- label="Âm thanh đầu ra (MP3)",
388
- type="filepath",
389
- autoplay=True,
390
- elem_id="output-audio"
391
- )
392
- out_srt = gr.File(
393
- label="File phụ đề (SRT)",
394
- file_types=[".srt"],
395
- elem_id="output-srt"
396
- )
397
- with gr.TabItem("Bản xem trước"):
398
- prev_audio = gr.Audio(
399
- label="Bản thử giọng",
400
- type="filepath",
401
- autoplay=True,
402
- elem_id="preview-audio"
403
- )
404
- gr.Markdown("""
405
- <footer>
406
- <p>Phát triển bởi <strong>Lý Trần</strong> | Cộng đồng LTTEAM</p>
407
- <p><a href="https://www.facebook.com/groups/622526090937760" target="_blank">Tham gia nhóm Facebook</a></p>
408
- </footer>
409
- """)
410
- # Update dropdowns khi đổi engine
411
- engine.change(on_engine_change, inputs=[engine], outputs=[api_key_input, gemini_model_input, voice_dd, gemini_voice_dd])
412
- lang_dd.change(on_language_change, inputs=[lang_dd], outputs=[gender_dd, voice_dd])
413
- gender_dd.change(on_gender_change, inputs=[lang_dd, gender_dd], outputs=[voice_dd])
414
- # Generate
415
- gen_btn.click(
416
- fn=generate_tts,
417
- inputs=[text_input, engine, lang_dd, gender_dd, voice_dd, rate_sl, pitch_sl, api_key_input, gemini_model_input, gemini_voice_dd],
418
- outputs=[out_audio, out_srt]
419
- )
420
- preview_btn.click(
421
- fn=preview_voice,
422
- inputs=[engine, lang_dd, gender_dd, voice_dd, rate_sl, pitch_sl, api_key_input, gemini_model_input, gemini_voice_dd],
423
- outputs=[prev_audio]
424
- )
425
-
426
- if __name__ == "__main__":
427
- demo.launch(server_name="0.0.0.0")