Epchannel commited on
Commit
9b20cba
·
1 Parent(s): c1600ea

first commit

Browse files
Files changed (2) hide show
  1. app.py +111 -0
  2. requirements.txt +9 -0
app.py ADDED
@@ -0,0 +1,111 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import torch
3
+ import gradio as gr
4
+ from datetime import datetime
5
+ from vinorm import TTSnorm
6
+ from underthesea import sent_tokenize
7
+ from unidecode import unidecode
8
+ import soundfile as sf
9
+ from TTS.tts.configs.xtts_config import XttsConfig
10
+ from TTS.tts.models.xtts import Xtts
11
+ from huggingface_hub import snapshot_download
12
+ import os
13
+
14
+ # Tải model nếu chưa có
15
+ if not os.path.exists("model/model.pth"):
16
+ snapshot_download(repo_id="epchannel/EpXTTS", repo_type="model", local_dir="model")
17
+
18
+ # Load XTTS model
19
+ def load_model():
20
+ config = XttsConfig()
21
+ config.load_json("model/config.json")
22
+ model = Xtts.init_from_config(config)
23
+ model.load_checkpoint(config, checkpoint_path="model/model.pth", vocab_path="model/vocab.json")
24
+ if torch.cuda.is_available():
25
+ model.cuda()
26
+ return model
27
+
28
+ # Chuẩn hóa văn bản tiếng Việt
29
+ def normalize_vietnamese_text(text):
30
+ return (
31
+ TTSnorm(text, unknown=False, lower=False, rule=True)
32
+ .replace("..", ".").replace("!.", "!").replace("?.", "?")
33
+ .replace(" .", ".").replace(" ,", ",").replace('"', "")
34
+ .replace("'", "").replace("AI", "Ây Ai").replace("A.I", "Ây Ai")
35
+ .replace("anh/chị", "anh chị")
36
+ )
37
+
38
+ # Tạo tên file
39
+ def get_file_name(text, max_char=50):
40
+ filename = unidecode(text[:max_char].lower().replace(" ", "_"))
41
+ timestamp = datetime.now().strftime("%m%d%H%M%S")
42
+ return f"{timestamp}_{filename}"
43
+
44
+ # Sinh tiếng nói
45
+ def synthesize(text, voice_choice):
46
+ model = load_model()
47
+ ref_audio = f"model/samples/{voice_choice}.wav"
48
+
49
+ # Prepare speaker embedding
50
+ gpt_latent, speaker_embed = model.get_conditioning_latents(
51
+ audio_path=ref_audio,
52
+ gpt_cond_len=model.config.gpt_cond_len,
53
+ max_ref_length=model.config.max_ref_len,
54
+ sound_norm_refs=model.config.sound_norm_refs,
55
+ )
56
+
57
+ try:
58
+ text = normalize_vietnamese_text(text)
59
+ except:
60
+ pass
61
+
62
+ sentences = sent_tokenize(text)
63
+ wav_chunks = []
64
+ for sent in sentences:
65
+ if sent.strip() == "":
66
+ continue
67
+ wav = model.inference(
68
+ text=sent,
69
+ language="vi",
70
+ gpt_cond_latent=gpt_latent,
71
+ speaker_embedding=speaker_embed,
72
+ temperature=0.5,
73
+ top_k=20,
74
+ top_p=0.85,
75
+ repetition_penalty=5.0,
76
+ )
77
+ wav_chunks.append(torch.tensor(wav["wav"]))
78
+
79
+ final_wav = torch.cat(wav_chunks, dim=0).unsqueeze(0)
80
+ filename = f"./output/{get_file_name(text)}.mp3"
81
+ os.makedirs("output", exist_ok=True)
82
+ sf.write(filename, final_wav.squeeze(0).numpy(), 24000, format='MP3')
83
+ return filename
84
+
85
+ # Giao diện Gradio
86
+ voices = {
87
+ "Bống Xinh": "bongxinh",
88
+ "Nam Calm": "nam-calm",
89
+ "Nam Cham": "nam-cham",
90
+ "Nam Truyền cảm": "nam-truyen-cam",
91
+ "Nữ Lưu Loát": "nu-luu-loat",
92
+ "Nữ Nhẹ Nhàng": "nu-nhe-nhang",
93
+ # Thêm các giọng bạn có...
94
+ }
95
+
96
+
97
+ with gr.Blocks() as demo:
98
+ gr.Markdown("## 🇻🇳 Text to Speech tiếng Việt (XTTS)")
99
+ with gr.Row():
100
+ text_input = gr.Textbox(label="Nhập văn bản", lines=5, placeholder="Nhập văn bản tiếng Việt...")
101
+ voice_choice = gr.Radio(choices=list(voices.keys()), label="Chọn giọng đọc", value="Bông Xinh")
102
+ btn = gr.Button("🎙️ Chuyển thành giọng nói")
103
+ audio_output = gr.Audio(label="🔊 Kết quả")
104
+
105
+ def process(text, voice_label):
106
+ file = synthesize(text, voices[voice_label])
107
+ return file
108
+
109
+ btn.click(fn=process, inputs=[text_input, voice_choice], outputs=audio_output)
110
+
111
+ demo.launch()
requirements.txt ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ torch
2
+ torchaudio
3
+ gradio==4.35
4
+ soundfile==0.13.1
5
+ vinorm==2.0.7
6
+ cutlet==0.5.0
7
+ unidic==1.1.0
8
+ underthesea
9
+ TTS @ git+https://github.com/thinhlpg/TTS.git@add-vietnamese-xtts