Spaces:

epchannel
/

EpXTTS

Sleeping

App Files Files Community

Epchannel commited on Apr 10, 2025

Commit

9b20cba

1 Parent(s): c1600ea

first commit

Browse files

Files changed (2) hide show

app.py +111 -0
requirements.txt +9 -0

app.py ADDED Viewed

	@@ -0,0 +1,111 @@

+import os
+import torch
+import gradio as gr
+from datetime import datetime
+from vinorm import TTSnorm
+from underthesea import sent_tokenize
+from unidecode import unidecode
+import soundfile as sf
+from TTS.tts.configs.xtts_config import XttsConfig
+from TTS.tts.models.xtts import Xtts
+from huggingface_hub import snapshot_download
+import os
+# Tải model nếu chưa có
+if not os.path.exists("model/model.pth"):
+    snapshot_download(repo_id="epchannel/EpXTTS", repo_type="model", local_dir="model")
+# Load XTTS model
+def load_model():
+    config = XttsConfig()
+    config.load_json("model/config.json")
+    model = Xtts.init_from_config(config)
+    model.load_checkpoint(config, checkpoint_path="model/model.pth", vocab_path="model/vocab.json")
+    if torch.cuda.is_available():
+        model.cuda()
+    return model
+# Chuẩn hóa văn bản tiếng Việt
+def normalize_vietnamese_text(text):
+    return (
+        TTSnorm(text, unknown=False, lower=False, rule=True)
+        .replace("..", ".").replace("!.", "!").replace("?.", "?")
+        .replace(" .", ".").replace(" ,", ",").replace('"', "")
+        .replace("'", "").replace("AI", "Ây Ai").replace("A.I", "Ây Ai")
+        .replace("anh/chị", "anh chị")
+    )
+# Tạo tên file
+def get_file_name(text, max_char=50):
+    filename = unidecode(text[:max_char].lower().replace(" ", "_"))
+    timestamp = datetime.now().strftime("%m%d%H%M%S")
+    return f"{timestamp}_{filename}"
+# Sinh tiếng nói
+def synthesize(text, voice_choice):
+    model = load_model()
+    ref_audio = f"model/samples/{voice_choice}.wav"
+    # Prepare speaker embedding
+    gpt_latent, speaker_embed = model.get_conditioning_latents(
+        audio_path=ref_audio,
+        gpt_cond_len=model.config.gpt_cond_len,
+        max_ref_length=model.config.max_ref_len,
+        sound_norm_refs=model.config.sound_norm_refs,
+    )
+    try:
+        text = normalize_vietnamese_text(text)
+    except:
+        pass
+    sentences = sent_tokenize(text)
+    wav_chunks = []
+    for sent in sentences:
+        if sent.strip() == "":
+            continue
+        wav = model.inference(
+            text=sent,
+            language="vi",
+            gpt_cond_latent=gpt_latent,
+            speaker_embedding=speaker_embed,
+            temperature=0.5,
+            top_k=20,
+            top_p=0.85,
+            repetition_penalty=5.0,
+        )
+        wav_chunks.append(torch.tensor(wav["wav"]))
+    final_wav = torch.cat(wav_chunks, dim=0).unsqueeze(0)
+    filename = f"./output/{get_file_name(text)}.mp3"
+    os.makedirs("output", exist_ok=True)
+    sf.write(filename, final_wav.squeeze(0).numpy(), 24000, format='MP3')
+    return filename
+# Giao diện Gradio
+voices = {
+    "Bống Xinh": "bongxinh",
+    "Nam Calm": "nam-calm",
+    "Nam Cham": "nam-cham",
+    "Nam Truyền cảm": "nam-truyen-cam",
+    "Nữ Lưu Loát": "nu-luu-loat",
+    "Nữ Nhẹ Nhàng": "nu-nhe-nhang",
+    # Thêm các giọng bạn có...
+}
+with gr.Blocks() as demo:
+    gr.Markdown("## 🇻🇳 Text to Speech tiếng Việt (XTTS)")
+    with gr.Row():
+        text_input = gr.Textbox(label="Nhập văn bản", lines=5, placeholder="Nhập văn bản tiếng Việt...")
+    voice_choice = gr.Radio(choices=list(voices.keys()), label="Chọn giọng đọc", value="Bông Xinh")
+    btn = gr.Button("🎙️ Chuyển thành giọng nói")
+    audio_output = gr.Audio(label="🔊 Kết quả")
+    def process(text, voice_label):
+        file = synthesize(text, voices[voice_label])
+        return file
+    btn.click(fn=process, inputs=[text_input, voice_choice], outputs=audio_output)
+demo.launch()

requirements.txt ADDED Viewed

	@@ -0,0 +1,9 @@

+torch
+torchaudio
+gradio==4.35
+soundfile==0.13.1
+vinorm==2.0.7
+cutlet==0.5.0
+unidic==1.1.0
+underthesea
+TTS @ git+https://github.com/thinhlpg/TTS.git@add-vietnamese-xtts