Spaces:

Havyle
/

VSL-Translation-Demo

Sleeping

App Files Files Community

Havyle commited on Dec 2, 2025

Commit

0fc5a71

verified ·

1 Parent(s): aa9a24c

Update app.py

Browse files

Files changed (1) hide show

app.py +179 -40

app.py CHANGED Viewed

@@ -2,19 +2,25 @@ import torch
 import gradio as gr
 from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM
-# --- PHẦN 1: CẤU HÌNH & LOAD MODEL ---
 DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
-# 1. Load PhoWhisper (Nhận dạng giọng nói)
 print("Đang tải model PhoWhisper...")
-asr_pipeline = pipeline(
-    "automatic-speech-recognition",
-    model="vinai/PhoWhisper-small",
-    device=0 if torch.cuda.is_available() else -1
-)
-# 2. Load Model Dịch (ViT5)
-# LƯU Ý QUAN TRỌNG:
 TRANSLATION_MODEL_PATH = "."
 print(f"Đang tải model dịch...")
@@ -26,15 +32,17 @@ except Exception as e:
     print(f"Lỗi load model dịch: {e}")
     trans_model = None
-# --- PHẦN 2: CÁC HÀM XỬ LÝ (LOGIC) ---
 def speech_to_text(audio_path):
     if audio_path is None: return ""
     try:
         output = asr_pipeline(audio_path)
         return output['text']
     except Exception as e:
-        return f"Lỗi: {str(e)}"
 def text_to_gloss(vietnamese_text):
     if not vietnamese_text: return ""
@@ -61,47 +69,178 @@ def full_pipeline(audio, text_input, mode):
     elif mode == "Văn bản (Nhập tay)" and text_input:
         vietnamese_output = text_input
     else:
-        return "Vui lòng nhập dữ liệu.", ""
     gloss_output = text_to_gloss(vietnamese_output)
     return vietnamese_output, gloss_output
-# --- PHẦN 3: GIAO DIỆN (UI) - ĐOẠN CODE CỦA BẠN ---
 custom_css = """
-.container {max-width: 1200px; margin: auto; padding-top: 20px}
-.header-text {text-align: center; font-family: 'Arial', sans-serif;}
-.uni-name {font-size: 24px; font-weight: bold; color: #003366; margin-bottom: 5px;}
-.faculty-name {font-size: 18px; font-weight: normal; color: #cc0000; margin-bottom: 20px;}
-.project-title {font-size: 28px; font-weight: bold; color: #2c3e50; margin-bottom: 10px; border-bottom: 2px solid #eee; padding-bottom: 10px;}
-.note-text {font-size: 14px; font-style: italic; color: #7f8c8d; margin-top: 20px; border-top: 1px solid #eee; padding-top: 10px;}
-.output-box {border: 1px solid #e0e0e0; background-color: #f9f9f9; border-radius: 8px;}
 """
-with gr.Blocks(css=custom_css, theme=gr.themes.Soft()) as demo:
-    # ... (Dán phần giao diện của bạn vào đây, y hệt đoạn bạn gửi) ...
-    # Để cho gọn, mình viết tắt, bạn giữ nguyên code UI của bạn nhé
-    with gr.Column(elem_classes="header-text"):
-        gr.Markdown("""<div class="project-title">HỆ THỐNG D���CH VSL DEMO</div>""")
     with gr.Row():
-        with gr.Column():
             with gr.Tabs():
-                with gr.TabItem("Giọng nói"):
-                    input_audio = gr.Audio(sources=["microphone", "upload"], type="filepath")
                     mode_audio = gr.State(value="Giọng nói (Microphone/File)")
-                    btn_audio = gr.Button("Xử lý Giọng nói", variant="primary")
-                with gr.TabItem("Văn bản"):
-                    input_text = gr.Textbox(label="Nhập text")
                     mode_text = gr.State(value="Văn bản (Nhập tay)")
-                    btn_text = gr.Button("Dịch Văn bản", variant="primary")
-        with gr.Column():
-            output_vi = gr.Textbox(label="Tiếng Việt")
-            output_gloss = gr.Textbox(label="VSL Gloss")
-    btn_audio.click(fn=full_pipeline, inputs=[input_audio, input_text, mode_audio], outputs=[output_vi, output_gloss])
-    btn_text.click(fn=full_pipeline, inputs=[input_audio, input_text, mode_text], outputs=[output_vi, output_gloss])
 # Chạy app
 if __name__ == "__main__":

 import gradio as gr
 from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM
+# ==========================================
+# 1. CẤU HÌNH & LOAD MODEL (BACKEND)
+# ==========================================
 DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
+# --- Load PhoWhisper ---
 print("Đang tải model PhoWhisper...")
+try:
+    asr_pipeline = pipeline(
+        "automatic-speech-recognition",
+        model="vinai/PhoWhisper-small",
+        device=0 if torch.cuda.is_available() else -1
+    )
+except Exception as e:
+    print(f"Lỗi load PhoWhisper: {e}")
+    asr_pipeline = None
+# --- Load ViT5 Translation ---
+# Lưu ý: Đảm bảo bạn đã upload các file model ra ngoài cùng (root) như hướng dẫn trước
 TRANSLATION_MODEL_PATH = "."
 print(f"Đang tải model dịch...")
     print(f"Lỗi load model dịch: {e}")
     trans_model = None
+# ==========================================
+# 2. HÀM XỬ LÝ LOGIC
+# ==========================================
 def speech_to_text(audio_path):
     if audio_path is None: return ""
+    if asr_pipeline is None: return "Lỗi: Chưa load được PhoWhisper."
     try:
         output = asr_pipeline(audio_path)
         return output['text']
     except Exception as e:
+        return f"Lỗi nhận dạng: {str(e)}"
 def text_to_gloss(vietnamese_text):
     if not vietnamese_text: return ""
     elif mode == "Văn bản (Nhập tay)" and text_input:
         vietnamese_output = text_input
     else:
+        # Trường hợp không có input
+        if mode == "Giọng nói (Microphone/File)":
+             return "⚠️ Vui lòng thu âm hoặc tải file.", ""
+        else:
+             return "⚠️ Vui lòng nhập văn bản.", ""
     gloss_output = text_to_gloss(vietnamese_output)
     return vietnamese_output, gloss_output
+# ==========================================
+# 3. GIAO DIỆN ĐẸP (UI/UX)
+# ==========================================
+# CSS tùy chỉnh để làm đẹp
 custom_css = """
+/* Font chữ toàn bộ app */
+@import url('https://fonts.googleapis.com/css2?family=Roboto:wght@300;400;700&display=swap');
+body { font-family: 'Roboto', sans-serif; }
+/* Header Gradient */
+.header-container {
+    background: linear-gradient(90deg, #003366 0%, #00509d 100%);
+    padding: 25px;
+    border-radius: 12px;
+    color: white;
+    text-align: center;
+    box-shadow: 0 4px 6px rgba(0,0,0,0.1);
+    margin-bottom: 20px;
+}
+.uni-name { font-size: 16px; text-transform: uppercase; letter-spacing: 1px; opacity: 0.9; }
+.project-name { font-size: 28px; font-weight: 700; margin: 10px 0; }
+.author-name { font-size: 14px; font-style: italic; opacity: 0.8; }
+/* Nút bấm xịn hơn */
+button.primary-btn {
+    background-color: #00509d !important;
+    color: white !important;
+    font-weight: bold;
+    border-radius: 8px;
+    transition: 0.3s;
+}
+button.primary-btn:hover {
+    background-color: #003366 !important;
+    transform: scale(1.02);
+}
+/* Khung kết quả nổi bật */
+.result-box textarea {
+    font-size: 18px !important;
+    font-weight: 500;
+    color: #2c3e50;
+}
+.gloss-box textarea {
+    font-size: 20px !important;
+    font-weight: bold;
+    color: #d32f2f; /* Màu đỏ cho Gloss */
+}
+/* Footer */
+.footer { text-align: center; color: gray; font-size: 12px; margin-top: 30px; }
 """
+# Chọn theme Soft để các góc bo tròn mềm mại
+theme = gr.themes.Soft(
+    primary_hue="blue",
+    secondary_hue="slate",
+).set(
+    button_primary_background_fill="#00509d",
+    button_primary_background_fill_hover="#003366",
+)
+with gr.Blocks(css=custom_css, theme=theme, title="VSL Translator") as demo:
+    # --- 1. HEADER ---
+    gr.HTML(
+        """
+        <div class="header-container">
+            <div class="uni-name">Trường Đại học Kinh tế - Đại học Đà Nẵng</div>
+            <div class="project-name">HỆ THỐNG DỊCH TIẾNG VIỆT SANG NGÔN NGỮ KÝ HIỆU (VSL)</div>
+            <div class="author-name">Sinh viên thực hiện: Lê Thị Hà Vy | GVHD: Th.S Nguyễn Văn Chức</div>
+        </div>
+        """
+    )
+    # --- 2. MAIN CONTENT ---
     with gr.Row():
+        # CỘT TRÁI: INPUT (Đầu vào)
+        with gr.Column(scale=1, variant="panel"):
+            gr.Markdown("### 🎤 Nhập liệu")
             with gr.Tabs():
+                # Tab 1: Giọng nói
+                with gr.TabItem("🎙️ Giọng nói"):
+                    input_audio = gr.Audio(
+                        sources=["microphone", "upload"],
+                        type="filepath",
+                        label="Thu âm hoặc Tải file Audio"
+                    )
                     mode_audio = gr.State(value="Giọng nói (Microphone/File)")
+                    btn_audio = gr.Button("✨ Xử lý Giọng nói", variant="primary", elem_classes="primary-btn")
+                # Tab 2: Văn bản
+                with gr.TabItem("📝 Văn bản"):
+                    input_text = gr.Textbox(
+                        label="Nhập câu tiếng Việt",
+                        placeholder="Ví dụ: Tôi muốn uống cà phê sữa...",
+                        lines=4
+                    )
                     mode_text = gr.State(value="Văn bản (Nhập tay)")
+                    # Gợi ý câu mẫu
+                    gr.Examples(
+                        examples=[
+                            ["Tôi muốn uống cà phê sữa"],
+                            ["Cho tôi một ly trà đào cam sả"],
+                            ["Cảm ơn bạn rất nhiều"]
+                        ],
+                        inputs=input_text,
+                        label="Câu mẫu (Click để chọn)"
+                    )
+                    btn_text = gr.Button("✨ Dịch Văn bản", variant="primary", elem_classes="primary-btn")
+        # CỘT PHẢI: OUTPUT (Kết quả)
+        with gr.Column(scale=1, variant="panel"):
+            gr.Markdown("### 🎯 Kết quả xử lý")
+            # Kết quả trung gian
+            gr.Label("Bước 1: Nhận dạng văn bản", show_label=False, color="blue")
+            output_vi = gr.Textbox(
+                label="Văn bản Tiếng Việt",
+                interactive=False,
+                show_copy_button=True,
+                elem_classes="result-box",
+                lines=2
+            )
+            gr.HTML("<br>") # Khoảng cách
+            # Kết quả cuối cùng
+            gr.Label("Bước 2: Cú pháp Ký hiệu (Gloss)", show_label=False, color="red")
+            output_gloss = gr.Textbox(
+                label="VSL Gloss Output",
+                interactive=False,
+                show_copy_button=True,
+                elem_classes="gloss-box",
+                lines=3
+            )
+    # --- 3. FOOTER ---
+    gr.HTML(
+        """
+        <div class="footer">
+        © 2025 VSL Translation Project. Khoa Thương mại Điện tử.<br>
+        Hệ thống sử dụng mô hình PhoWhisper (VinAI) và ViT5 (Fine-tuned).
+        </div>
+        """
+    )
+    # --- 4. XỬ LÝ SỰ KIỆN ---
+    btn_audio.click(
+        fn=full_pipeline,
+        inputs=[input_audio, input_text, mode_audio],
+        outputs=[output_vi, output_gloss]
+    )
+    btn_text.click(
+        fn=full_pipeline,
+        inputs=[input_audio, input_text, mode_text],
+        outputs=[output_vi, output_gloss]
+    )
 # Chạy app
 if __name__ == "__main__":