Spaces:

Hzzzzx0
/

Automatic_machine_translation

Runtime error

App Files Files Community

Hzzzzx0 commited on Dec 14, 2025

Commit

39986f4

verified ·

1 Parent(s): dbf62b2

Upload app.py with huggingface_hub

Browse files

Files changed (1) hide show

app.py +154 -0

app.py ADDED Viewed

	@@ -0,0 +1,154 @@

+"""Gradio demo for bilingual oral translation using Qwen3-0.6B + LoRA.
+This app provides a simple interface for Chinese ↔ English oral translation
+using a LoRA fine-tuned Qwen3-0.6B model.
+"""
+import gradio as gr
+import torch
+from transformers import AutoModelForCausalLM, AutoTokenizer
+from peft import PeftModel
+def build_prompt(direction: str, text: str) -> str:
+    """Build the instruction prompt for a given translation direction."""
+    if direction == "zh2en":
+        inst = "请把下面中文翻译成口语自然的英文。只输出译文。"
+    else:
+        inst = "请把下面英文翻译成口语自然的中文。只输出译文。"
+    return f"### Instruction:\n{inst}\n\n### Input:\n{text}\n\n### Response:\n"
+def load_model():
+    """Load the base model and LoRA adapter."""
+    base_model_name = "Qwen/Qwen3-0.6B"
+    adapter_path = "Hzzzzx0/qwen3-0.6b-oral-lora"  # You'll need to upload your model here
+    print(f"Loading base model: {base_model_name}")
+    tokenizer = AutoTokenizer.from_pretrained(base_model_name, trust_remote_code=True)
+    model = AutoModelForCausalLM.from_pretrained(
+        base_model_name,
+        torch_dtype=torch.bfloat16,
+        device_map="auto",
+        trust_remote_code=True,
+    )
+    print(f"Loading LoRA adapter: {adapter_path}")
+    model = PeftModel.from_pretrained(model, adapter_path)
+    model.eval()
+    return model, tokenizer
+# Load model at startup
+print("Initializing model...")
+model, tokenizer = load_model()
+print("Model loaded successfully!")
+def translate(direction: str, text: str) -> str:
+    """Translate text using the LoRA fine-tuned model."""
+    if not text.strip():
+        return "请输入要翻译的文本 / Please enter text to translate"
+    prompt = build_prompt(direction, text)
+    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
+    with torch.no_grad():
+        output = model.generate(
+            **inputs,
+            max_new_tokens=128,
+            do_sample=False,
+            repetition_penalty=1.2,
+            eos_token_id=tokenizer.eos_token_id,
+            pad_token_id=tokenizer.pad_token_id,
+        )
+    result = tokenizer.decode(output[0], skip_special_tokens=True)
+    # Extract only the response part
+    if "### Response:" in result:
+        return result.split("### Response:")[-1].strip()
+    return result
+# Define example inputs
+examples = [
+    ["zh2en", "你好呀"],
+    ["zh2en", "今天天气真不错"],
+    ["zh2en", "我们去吃饭吧"],
+    ["en2zh", "See you later"],
+    ["en2zh", "How are you doing?"],
+    ["en2zh", "Let's grab some coffee"],
+]
+# Create Gradio interface
+with gr.Blocks(title="口语化机器翻译 | Oral Translation", theme=gr.themes.Soft()) as demo:
+    gr.Markdown(
+        """
+        # 🌐 口语化自动机器翻译
+        ## Oral Machine Translation (Chinese ↔ English)
+        基于 **Qwen3-0.6B + LoRA** 微调的中英双向口语翻译系统
+        Built with Qwen3-0.6B fine-tuned using LoRA for natural, conversational translation.
+        """
+    )
+    with gr.Row():
+        with gr.Column():
+            direction = gr.Radio(
+                choices=[
+                    ("中文 → 英文 (Chinese to English)", "zh2en"),
+                    ("英文 → 中文 (English to Chinese)", "en2zh"),
+                ],
+                value="zh2en",
+                label="翻译方向 | Translation Direction",
+            )
+            input_text = gr.Textbox(
+                lines=5,
+                placeholder="输入要翻译的文本...\nEnter text to translate...",
+                label="输入 | Input",
+            )
+            translate_btn = gr.Button("🔄 翻译 | Translate", variant="primary")
+        with gr.Column():
+            output_text = gr.Textbox(
+                lines=5,
+                label="翻译结果 | Translation",
+            )
+    gr.Examples(
+        examples=examples,
+        inputs=[direction, input_text],
+        outputs=output_text,
+        fn=translate,
+        cache_examples=False,
+    )
+    translate_btn.click(
+        fn=translate,
+        inputs=[direction, input_text],
+        outputs=output_text,
+    )
+    gr.Markdown(
+        """
+        ---
+        ### 📊 模型信息 | Model Info
+        - **基础模型 | Base Model**: Qwen3-0.6B
+        - **微调方法 | Fine-tuning**: LoRA (rank=16, alpha=32)
+        - **训练数据 | Training Data**: OpenSubtitles (5K samples)
+        - **BLEU Score**: 11.89 (vs 1.24 baseline, +858% improvement)
+        ### 🔗 相关链接 | Links
+        - [GitHub Repository](https://github.com/yourusername/mt-qwen-oral)
+        - [Model Card](https://huggingface.co/Hzzzzx0/qwen3-0.6b-oral-lora)
+        """
+    )
+if __name__ == "__main__":
+    demo.launch()