Spaces:
Sleeping
Sleeping
Upload app.py
Browse files
app.py
CHANGED
|
@@ -6,6 +6,7 @@ import gradio as gr
|
|
| 6 |
import soundfile as sf
|
| 7 |
import tempfile
|
| 8 |
import torch
|
|
|
|
| 9 |
from vieneu_tts import VieNeuTTS
|
| 10 |
import time
|
| 11 |
|
|
@@ -28,7 +29,7 @@ except Exception as e:
|
|
| 28 |
return np.random.uniform(-0.1, 0.1, 24000*2)
|
| 29 |
tts = MockTTS()
|
| 30 |
|
| 31 |
-
# --- 2. DATA ---
|
| 32 |
VOICE_SAMPLES = {
|
| 33 |
"Tuyên (nam miền Bắc)": {"audio": "./sample/Tuyên (nam miền Bắc).wav", "text": "./sample/Tuyên (nam miền Bắc).txt"},
|
| 34 |
"Thiện Tâm": {"audio": "./sample/thientam.mp3", "text": "./sample/thientam.txt"},
|
|
@@ -54,13 +55,22 @@ def load_reference_info(voice_choice):
|
|
| 54 |
return None, ""
|
| 55 |
|
| 56 |
@spaces.GPU(duration=120)
|
| 57 |
-
def synthesize_speech(text, voice_choice, custom_audio, custom_text, mode_tab):
|
| 58 |
try:
|
| 59 |
if not text or text.strip() == "":
|
| 60 |
return None, "⚠️ Vui lòng nhập nội dung!"
|
| 61 |
-
|
| 62 |
-
|
| 63 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 64 |
if mode_tab == "custom_mode":
|
| 65 |
if custom_audio is None or not custom_text:
|
| 66 |
return None, "⚠️ Thiếu Audio mẫu hoặc Text mẫu."
|
|
@@ -71,126 +81,133 @@ def synthesize_speech(text, voice_choice, custom_audio, custom_text, mode_tab):
|
|
| 71 |
with open(VOICE_SAMPLES[voice_choice]["text"], "r", encoding="utf-8") as f:
|
| 72 |
ref_text_raw = f.read()
|
| 73 |
|
|
|
|
| 74 |
start_time = time.time()
|
| 75 |
ref_codes = tts.encode_reference(ref_audio_path)
|
| 76 |
-
wav = tts.infer(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 77 |
process_time = time.time() - start_time
|
| 78 |
|
| 79 |
with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp_file:
|
| 80 |
sf.write(tmp_file.name, wav, 24000)
|
| 81 |
output_path = tmp_file.name
|
| 82 |
|
| 83 |
-
return output_path, f"⚡ Xử lý: {process_time:.2f}s"
|
| 84 |
except Exception as e:
|
| 85 |
return None, f"❌ Lỗi: {str(e)}"
|
| 86 |
|
| 87 |
-
# --- 4. THEME & CSS
|
| 88 |
theme = gr.themes.Default(
|
| 89 |
primary_hue="indigo",
|
| 90 |
-
secondary_hue="
|
| 91 |
neutral_hue="slate",
|
| 92 |
font=[gr.themes.GoogleFont('Inter'), 'sans-serif'],
|
| 93 |
).set(
|
| 94 |
-
body_background_fill="#
|
| 95 |
block_background_fill="#0f172a",
|
| 96 |
block_border_width="1px",
|
| 97 |
-
block_label_text_color="#94a3b8",
|
| 98 |
-
button_primary_background_fill="linear-gradient(135deg, #6366f1 0%, #a855f7 100%)",
|
| 99 |
-
button_primary_background_fill_hover="linear-gradient(135deg, #4f46e5 0%, #9333ea 100%)",
|
| 100 |
-
button_primary_text_color="white",
|
| 101 |
input_background_fill="#1e293b",
|
| 102 |
input_border_color="#334155",
|
|
|
|
| 103 |
)
|
| 104 |
|
| 105 |
css = """
|
| 106 |
-
.
|
| 107 |
-
.main-wrap { max-width: 1100px !important; margin: auto !important; padding: 40px 20px !important; }
|
| 108 |
-
.header-area { text-align: center; margin-bottom: 40px; }
|
| 109 |
-
.header-area h1 {
|
| 110 |
-
background: linear-gradient(90deg, #60a5fa, #c084fc);
|
| 111 |
-
-webkit-background-clip: text; -webkit-text-fill-color: transparent;
|
| 112 |
-
font-size: 3rem !important; font-weight: 900 !important; letter-spacing: -1px; margin: 0;
|
| 113 |
-
}
|
| 114 |
-
.header-area p { color: #64748b; font-size: 1.1rem; margin-top: 10px; }
|
| 115 |
.st-card {
|
| 116 |
-
border-radius:
|
| 117 |
-
border: 1px solid rgba(255,255,255,0.
|
| 118 |
-
box-shadow: 0 20px
|
| 119 |
padding: 15px;
|
| 120 |
}
|
| 121 |
.result-card {
|
| 122 |
-
|
| 123 |
-
|
| 124 |
-
|
| 125 |
}
|
| 126 |
-
|
| 127 |
-
|
| 128 |
"""
|
| 129 |
|
| 130 |
# --- 5. UI CONSTRUCTION ---
|
| 131 |
-
with gr.Blocks(title="
|
| 132 |
|
| 133 |
with gr.Column(elem_classes="main-wrap"):
|
| 134 |
-
gr.HTML("""
|
| 135 |
-
<div class="header-area">
|
| 136 |
-
<h1>VieNeu Studio</h1>
|
| 137 |
-
<p>Nền tảng chuyển đổi giọng nói AI chuyên nghiệp</p>
|
| 138 |
-
</div>
|
| 139 |
-
""")
|
| 140 |
-
|
| 141 |
with gr.Row(equal_height=True):
|
| 142 |
-
#
|
| 143 |
with gr.Column(scale=1):
|
| 144 |
with gr.Group(elem_classes="st-card"):
|
| 145 |
text_input = gr.Textbox(
|
| 146 |
-
label="VĂN BẢN
|
| 147 |
-
placeholder="
|
| 148 |
-
lines=
|
| 149 |
show_label=True,
|
| 150 |
)
|
| 151 |
-
char_count = gr.HTML("<div style='text-align: right; color: #
|
| 152 |
|
| 153 |
-
#
|
| 154 |
with gr.Column(scale=1):
|
| 155 |
with gr.Tabs() as tabs:
|
| 156 |
-
with gr.TabItem("👤
|
| 157 |
voice_select = gr.Dropdown(
|
| 158 |
choices=list(VOICE_SAMPLES.keys()),
|
| 159 |
value="Tuyên (nam miền Bắc)",
|
| 160 |
-
label="
|
| 161 |
)
|
| 162 |
with gr.Accordion("Nghe thử giọng mẫu", open=False):
|
| 163 |
ref_audio_preview = gr.Audio(interactive=False, show_label=False)
|
| 164 |
ref_text_preview = gr.Markdown("...")
|
| 165 |
|
| 166 |
-
with gr.TabItem("🎙️
|
| 167 |
-
gr.
|
| 168 |
-
|
| 169 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 170 |
|
| 171 |
current_mode = gr.State(value="preset_mode")
|
| 172 |
|
| 173 |
gr.Markdown("<br>")
|
| 174 |
-
btn_generate = gr.Button("TỔNG HỢP
|
| 175 |
|
| 176 |
-
# Khu vực kết quả đã sửa lỗi 'style'
|
| 177 |
with gr.Group(elem_classes="st-card result-card"):
|
| 178 |
-
audio_output = gr.Audio(label="KẾT QUẢ
|
| 179 |
-
status_output = gr.Markdown("<p style='text-align: center; color: #
|
| 180 |
|
| 181 |
-
gr.HTML("<div class='footer'>
|
| 182 |
|
| 183 |
# --- LOGIC ---
|
| 184 |
def update_count(text):
|
| 185 |
l = len(text)
|
| 186 |
-
color = "#
|
| 187 |
-
return f"<div style='text-align: right; color: {color}; font-size: 0.85rem; font-weight:
|
| 188 |
|
| 189 |
text_input.change(update_count, text_input, char_count)
|
| 190 |
|
| 191 |
def update_ref_preview(voice):
|
| 192 |
audio, text = load_reference_info(voice)
|
| 193 |
-
return audio, f"
|
| 194 |
|
| 195 |
voice_select.change(update_ref_preview, voice_select, [ref_audio_preview, ref_text_preview])
|
| 196 |
demo.load(update_ref_preview, voice_select, [ref_audio_preview, ref_text_preview])
|
|
@@ -200,15 +217,9 @@ with gr.Blocks(title="VieNeu-TTS Studio") as demo:
|
|
| 200 |
|
| 201 |
btn_generate.click(
|
| 202 |
fn=synthesize_speech,
|
| 203 |
-
inputs=[text_input, voice_select, custom_audio, custom_text, current_mode],
|
| 204 |
outputs=[audio_output, status_output]
|
| 205 |
)
|
| 206 |
|
| 207 |
if __name__ == "__main__":
|
| 208 |
-
|
| 209 |
-
demo.queue().launch(
|
| 210 |
-
theme=theme,
|
| 211 |
-
css=css,
|
| 212 |
-
server_name="0.0.0.0",
|
| 213 |
-
server_port=7860
|
| 214 |
-
)
|
|
|
|
| 6 |
import soundfile as sf
|
| 7 |
import tempfile
|
| 8 |
import torch
|
| 9 |
+
import librosa # Thêm thư viện xử lý âm thanh
|
| 10 |
from vieneu_tts import VieNeuTTS
|
| 11 |
import time
|
| 12 |
|
|
|
|
| 29 |
return np.random.uniform(-0.1, 0.1, 24000*2)
|
| 30 |
tts = MockTTS()
|
| 31 |
|
| 32 |
+
# --- 2. DATA (Giữ nguyên danh sách giọng mẫu) ---
|
| 33 |
VOICE_SAMPLES = {
|
| 34 |
"Tuyên (nam miền Bắc)": {"audio": "./sample/Tuyên (nam miền Bắc).wav", "text": "./sample/Tuyên (nam miền Bắc).txt"},
|
| 35 |
"Thiện Tâm": {"audio": "./sample/thientam.mp3", "text": "./sample/thientam.txt"},
|
|
|
|
| 55 |
return None, ""
|
| 56 |
|
| 57 |
@spaces.GPU(duration=120)
|
| 58 |
+
def synthesize_speech(text, voice_choice, custom_audio, custom_text, mode_tab, pause_level, speed_value):
|
| 59 |
try:
|
| 60 |
if not text or text.strip() == "":
|
| 61 |
return None, "⚠️ Vui lòng nhập nội dung!"
|
| 62 |
+
|
| 63 |
+
# 3.1. Xử lý độ ngắt nghỉ (Pause level)
|
| 64 |
+
processed_text = text
|
| 65 |
+
if pause_level == "Trung bình":
|
| 66 |
+
processed_text = processed_text.replace(",", ", , ").replace(".", ". . ")
|
| 67 |
+
elif pause_level == "Dài":
|
| 68 |
+
processed_text = processed_text.replace(",", ", , , ").replace(".", ". . . . ")
|
| 69 |
+
|
| 70 |
+
if len(processed_text) > 400:
|
| 71 |
+
processed_text = processed_text[:400]
|
| 72 |
+
|
| 73 |
+
# 3.2. Lấy dữ liệu Reference
|
| 74 |
if mode_tab == "custom_mode":
|
| 75 |
if custom_audio is None or not custom_text:
|
| 76 |
return None, "⚠️ Thiếu Audio mẫu hoặc Text mẫu."
|
|
|
|
| 81 |
with open(VOICE_SAMPLES[voice_choice]["text"], "r", encoding="utf-8") as f:
|
| 82 |
ref_text_raw = f.read()
|
| 83 |
|
| 84 |
+
# 3.3. Thực hiện Inference
|
| 85 |
start_time = time.time()
|
| 86 |
ref_codes = tts.encode_reference(ref_audio_path)
|
| 87 |
+
wav = tts.infer(processed_text, ref_codes, ref_text_raw)
|
| 88 |
+
|
| 89 |
+
# 3.4. Điều chỉnh Tốc độ (Speed) bằng librosa
|
| 90 |
+
if speed_value != 1.0:
|
| 91 |
+
# Time stretch giữ nguyên pitch
|
| 92 |
+
wav = librosa.effects.time_stretch(wav, rate=float(speed_value))
|
| 93 |
+
|
| 94 |
process_time = time.time() - start_time
|
| 95 |
|
| 96 |
with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp_file:
|
| 97 |
sf.write(tmp_file.name, wav, 24000)
|
| 98 |
output_path = tmp_file.name
|
| 99 |
|
| 100 |
+
return output_path, f"⚡ Xử lý: {process_time:.2f}s | Tốc độ: {speed_value}x"
|
| 101 |
except Exception as e:
|
| 102 |
return None, f"❌ Lỗi: {str(e)}"
|
| 103 |
|
| 104 |
+
# --- 4. THEME & CSS ---
|
| 105 |
theme = gr.themes.Default(
|
| 106 |
primary_hue="indigo",
|
| 107 |
+
secondary_hue="blue",
|
| 108 |
neutral_hue="slate",
|
| 109 |
font=[gr.themes.GoogleFont('Inter'), 'sans-serif'],
|
| 110 |
).set(
|
| 111 |
+
body_background_fill="#020617",
|
| 112 |
block_background_fill="#0f172a",
|
| 113 |
block_border_width="1px",
|
|
|
|
|
|
|
|
|
|
|
|
|
| 114 |
input_background_fill="#1e293b",
|
| 115 |
input_border_color="#334155",
|
| 116 |
+
button_primary_background_fill="linear-gradient(135deg, #4f46e5 0%, #7c3aed 100%)",
|
| 117 |
)
|
| 118 |
|
| 119 |
css = """
|
| 120 |
+
.main-wrap { max-width: 1200px !important; margin: auto !important; padding: 20px !important; }
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 121 |
.st-card {
|
| 122 |
+
border-radius: 16px !important;
|
| 123 |
+
border: 1px solid rgba(255,255,255,0.1) !important;
|
| 124 |
+
box-shadow: 0 4px 20px rgba(0,0,0,0.5) !important;
|
| 125 |
padding: 15px;
|
| 126 |
}
|
| 127 |
.result-card {
|
| 128 |
+
background: linear-gradient(180deg, rgba(15, 23, 42, 0.8) 0%, rgba(30, 41, 59, 0.8) 100%) !important;
|
| 129 |
+
border: 1px solid rgba(99, 102, 241, 0.2) !important;
|
| 130 |
+
margin-top: 15px;
|
| 131 |
}
|
| 132 |
+
audio { filter: invert(90%) hue-rotate(180deg) brightness(1.5); width: 100%; border-radius: 8px; }
|
| 133 |
+
.footer { text-align: center; margin-top: 40px; color: #475569; font-size: 0.8rem; font-weight: 500; }
|
| 134 |
"""
|
| 135 |
|
| 136 |
# --- 5. UI CONSTRUCTION ---
|
| 137 |
+
with gr.Blocks(title="AI Voice Studio") as demo:
|
| 138 |
|
| 139 |
with gr.Column(elem_classes="main-wrap"):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 140 |
with gr.Row(equal_height=True):
|
| 141 |
+
# CỘT TRÁI
|
| 142 |
with gr.Column(scale=1):
|
| 143 |
with gr.Group(elem_classes="st-card"):
|
| 144 |
text_input = gr.Textbox(
|
| 145 |
+
label="VĂN BẢN CẦN CHUYỂN ĐỔI",
|
| 146 |
+
placeholder="Nhập nội dung vào đây...",
|
| 147 |
+
lines=20, # Tăng thêm để cân bằng với các nút mới
|
| 148 |
show_label=True,
|
| 149 |
)
|
| 150 |
+
char_count = gr.HTML("<div style='text-align: right; color: #6366f1; font-size: 0.85rem; font-weight: bold; padding: 5px;'>0 / 250</div>")
|
| 151 |
|
| 152 |
+
# CỘT PHẢI
|
| 153 |
with gr.Column(scale=1):
|
| 154 |
with gr.Tabs() as tabs:
|
| 155 |
+
with gr.TabItem("👤 Nghệ sĩ đọc", id="preset_mode"):
|
| 156 |
voice_select = gr.Dropdown(
|
| 157 |
choices=list(VOICE_SAMPLES.keys()),
|
| 158 |
value="Tuyên (nam miền Bắc)",
|
| 159 |
+
label="Lựa chọn giọng đọc mẫu",
|
| 160 |
)
|
| 161 |
with gr.Accordion("Nghe thử giọng mẫu", open=False):
|
| 162 |
ref_audio_preview = gr.Audio(interactive=False, show_label=False)
|
| 163 |
ref_text_preview = gr.Markdown("...")
|
| 164 |
|
| 165 |
+
with gr.TabItem("🎙️ Nhân bản (Clone)", id="custom_mode"):
|
| 166 |
+
custom_audio = gr.Audio(label="Audio gốc", type="filepath")
|
| 167 |
+
custom_text = gr.Textbox(
|
| 168 |
+
label="NỘI DUNG AUDIO MẪU",
|
| 169 |
+
placeholder="Nhập lời thoại của audio mẫu...",
|
| 170 |
+
lines=4,
|
| 171 |
+
show_label=True
|
| 172 |
+
)
|
| 173 |
+
|
| 174 |
+
# --- KHU VỰC ĐIỀU CHỈNH ÂM THANH ---
|
| 175 |
+
with gr.Row():
|
| 176 |
+
pause_level = gr.Radio(
|
| 177 |
+
choices=["Mặc định", "Trung bình", "Dài"],
|
| 178 |
+
value="Mặc định",
|
| 179 |
+
label="Độ ngắt nghỉ",
|
| 180 |
+
scale=1
|
| 181 |
+
)
|
| 182 |
+
speed_select = gr.Dropdown(
|
| 183 |
+
choices=[0.8, 0.9, 1.0, 1.1, 1.2, 1.5],
|
| 184 |
+
value=1.0,
|
| 185 |
+
label="Tốc độ đọc",
|
| 186 |
+
scale=1
|
| 187 |
+
)
|
| 188 |
|
| 189 |
current_mode = gr.State(value="preset_mode")
|
| 190 |
|
| 191 |
gr.Markdown("<br>")
|
| 192 |
+
btn_generate = gr.Button("BẮT ĐẦU TỔNG HỢP", variant="primary", size="lg")
|
| 193 |
|
|
|
|
| 194 |
with gr.Group(elem_classes="st-card result-card"):
|
| 195 |
+
audio_output = gr.Audio(label="AUDIO KẾT QUẢ", interactive=False, autoplay=True)
|
| 196 |
+
status_output = gr.Markdown("<p style='text-align: center; color: #818cf8; font-weight: 500;'>✨ Sẵn sàng thực hiện</p>")
|
| 197 |
|
| 198 |
+
gr.HTML("<div class='footer'>ENGINE BY VIENEU-TTS • PROFESSIONAL AI SOLUTIONS 2025</div>")
|
| 199 |
|
| 200 |
# --- LOGIC ---
|
| 201 |
def update_count(text):
|
| 202 |
l = len(text)
|
| 203 |
+
color = "#6366f1" if l <= 250 else "#f43f5e"
|
| 204 |
+
return f"<div style='text-align: right; color: {color}; font-size: 0.85rem; font-weight: bold; padding: 5px;'>{l} / 250</div>"
|
| 205 |
|
| 206 |
text_input.change(update_count, text_input, char_count)
|
| 207 |
|
| 208 |
def update_ref_preview(voice):
|
| 209 |
audio, text = load_reference_info(voice)
|
| 210 |
+
return audio, f"**Nội dung mẫu:** *\"{text}\"*"
|
| 211 |
|
| 212 |
voice_select.change(update_ref_preview, voice_select, [ref_audio_preview, ref_text_preview])
|
| 213 |
demo.load(update_ref_preview, voice_select, [ref_audio_preview, ref_text_preview])
|
|
|
|
| 217 |
|
| 218 |
btn_generate.click(
|
| 219 |
fn=synthesize_speech,
|
| 220 |
+
inputs=[text_input, voice_select, custom_audio, custom_text, current_mode, pause_level, speed_select],
|
| 221 |
outputs=[audio_output, status_output]
|
| 222 |
)
|
| 223 |
|
| 224 |
if __name__ == "__main__":
|
| 225 |
+
demo.queue().launch(theme=theme, css=css, server_name="0.0.0.0", server_port=7860)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|