Spaces:

jing-ju
/

AI-Translates

Runtime error

App Files Files Community

jing-ju commited on Sep 13

Commit

fb0574e

verified ·

1 Parent(s): b3b7ed6

Update app.py

Browse files

Files changed (1) hide show

app.py +25 -29

app.py CHANGED Viewed

@@ -2,28 +2,31 @@ import os
 import gradio as gr
 import torch
 from transformers import AutoTokenizer, AutoModelForCausalLM
-# Bạn có thể đổi MODEL_NAME tại "Variables" của Space thay vì sửa code
 MODEL_NAME = os.getenv("MODEL_NAME", "tencent/Hunyuan-MT-Chimera-7B-fp8")
-# Tham số sinh khuyến nghị theo model card
 GEN_KW = dict(
-    max_new_tokens=256,  # giữ thấp để bản thử CPU tránh quá chậm/thiếu RAM
-    top_k=20,
-    top_p=0.6,
-    repetition_penalty=1.05,
-    temperature=0.7,
-    do_sample=True
 )
-# Load model/tokenizer (trust_remote_code cho phép dùng chat template do model cung cấp)
 tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, trust_remote_code=True)
-# Trên CPU miễn phí, không có GPU → bỏ device_map="auto".
-# Nếu bạn đổi Space sang GPU về sau, có thể thêm device_map="auto".
 model = AutoModelForCausalLM.from_pretrained(
     MODEL_NAME,
-    torch_dtype="auto",
-    trust_remote_code=True
 )
 def _chat_translate(prompt: str) -> str:
@@ -37,30 +40,23 @@ def _chat_translate(prompt: str) -> str:
 def zh_to_vi(text: str) -> str:
     return _chat_translate(
-        f"Translate the following segment into Vietnamese, without additional explanation.\n\n{text}"
     )
 def vi_to_zh(text: str) -> str:
     return _chat_translate(
-        f"Translate the following segment into Chinese, without additional explanation.\n\n{text}"
     )
-# ===== Gradio UI (2 tab) + API endpoints =====
 with gr.Blocks() as demo:
-    gr.Markdown("## Hunyuan-MT Chimera-7B-fp8 (Trial on CPU) — ZH ⇄ VI\n⚠️ Bản thử nghiệm CPU: tốc độ chậm / có thể giới hạn độ dài.")
     with gr.Tab("ZH → VI"):
-        inp_zh = gr.Textbox(label="Nhập tiếng Trung", lines=6, placeholder="Nhập đoạn tiếng Trung...")
-        out_vi = gr.Textbox(label="Bản dịch tiếng Việt", lines=6)
-        btn_zh = gr.Button("Dịch ZH→VI")
-        # api_name để client có thể gọi như một API
-        btn_zh.click(fn=zh_to_vi, inputs=inp_zh, outputs=out_vi, api_name="zh_vi")
     with gr.Tab("VI → ZH"):
-        inp_vi = gr.Textbox(label="Nhập tiếng Việt", lines=6, placeholder="Nhập đoạn tiếng Việt...")
-        out_zh = gr.Textbox(label="Bản dịch tiếng Trung", lines=6)
-        btn_vi = gr.Button("Dịch VI→ZH")
-        btn_vi.click(fn=vi_to_zh, inputs=inp_vi, outputs=out_zh, api_name="vi_zh")
-# Giới hạn hàng đợi để demo ít user (tránh quá tải)
 demo.queue(concurrency_count=1, max_size=2).launch()

 import gradio as gr
 import torch
 from transformers import AutoTokenizer, AutoModelForCausalLM
+# 👇 import config lượng tử hoá cho compressed-tensors
+from transformers import CompressedTensorsQuantizationConfig
 MODEL_NAME = os.getenv("MODEL_NAME", "tencent/Hunyuan-MT-Chimera-7B-fp8")
+# Tham số sinh gợi ý
 GEN_KW = dict(
+    max_new_tokens=256,  # hạ thấp cho CPU free; có thể tăng nếu ổn
+    top_k=20, top_p=0.6, repetition_penalty=1.05, temperature=0.7, do_sample=True
 )
 tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, trust_remote_code=True)
+# ✅ Truyền quantization_config và ép ignore=[] để tránh NoneType
+ctq = CompressedTensorsQuantizationConfig(
+    quantization_method="fp8",   # để transformers chọn đúng quantizer
+    ignore=[]                    # <-- chìa khoá sửa lỗi
+)
 model = AutoModelForCausalLM.from_pretrained(
     MODEL_NAME,
+    trust_remote_code=True,
+    quantization_config=ctq,     # <-- dùng ctq thay vì mặc định trong config
+    # Không đặt device_map="auto" nếu bạn đang ở CPU Space
+    # dtype có thể để "auto"; cảnh báo "torch_dtype deprecated" có thể bỏ qua
 )
 def _chat_translate(prompt: str) -> str:
 def zh_to_vi(text: str) -> str:
     return _chat_translate(
+        "Translate the following segment into Vietnamese, without additional explanation.\n\n" + text
     )
 def vi_to_zh(text: str) -> str:
     return _chat_translate(
+        "Translate the following segment into Chinese, without additional explanation.\n\n" + text
     )
 with gr.Blocks() as demo:
+    gr.Markdown("## Hunyuan-MT Chimera-7B-fp8 (Trial on CPU) — ZH ⇄ VI")
     with gr.Tab("ZH → VI"):
+        inp_zh = gr.Textbox(label="Tiếng Trung", lines=6)
+        out_vi = gr.Textbox(label="Tiếng Việt", lines=6)
+        gr.Button("Dịch ZH→VI").click(zh_to_vi, inputs=inp_zh, outputs=out_vi, api_name="zh_vi")
     with gr.Tab("VI → ZH"):
+        inp_vi = gr.Textbox(label="Tiếng Việt", lines=6)
+        out_zh = gr.Textbox(label="Tiếng Trung", lines=6)
+        gr.Button("Dịch VI→ZH").click(vi_to_zh, inputs=inp_vi, outputs=out_zh, api_name="vi_zh")
 demo.queue(concurrency_count=1, max_size=2).launch()