Spaces:

eoeooe
/

Testmodel

Runtime error

File size: 5,108 Bytes

ed46fb8
 
 
c2fc27a
ed46fb8
c2fc27a
 
 
 
 
ed46fb8
 
c2fc27a
ed46fb8
 
c2fc27a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ed46fb8
 
c2fc27a
 
ed46fb8
 
c2fc27a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ed46fb8
c2fc27a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ed46fb8
 
c2fc27a

import gradio as gr
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch
import re

# ใช้โมเดล Typhoon Translate 4B (เก่ง EN/TH สุด ๆ สำหรับงานนี้)
model_name = "typhoon-ai/typhoon-translate-4b"  # หรือลอง "scb10x/typhoon-1.5x-1b-chat" ถ้าอยากขนาดเล็กกว่า

print(f"Loading model: {model_name}")
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
    device_map="auto"
)
model.eval()  # โหมด inference

def translate_to_thai(text, source_lang="English"):
    """
    แปลจาก source_lang เป็นไทยธรรมชาติ สไตล์มังงะ/บทสนทนา
    """
    if not text.strip():
        return "กรุณาใส่ข้อความที่จะแปล"

    # Prompt ที่แข็งแรงสำหรับ Typhoon (ปรับให้เหมาะมังงะ)
    system_prompt = """คุณคือนักแปลมังงะ/การ์ตูนมืออาชีพ แปลข้อความจากภาษาต้นทางเป็นภาษาไทยที่เป็นธรรมชาติ อ่านง่าย สไตล์บทสนทนามังงะ ใช้ภาษาพูดเหมาะกับตัวละคร รักษาความหมายเดิม 100% อย่าเพิ่มหรือตัดเนื้อหา

ส่งคืนเฉพาะข้อความภาษาไทยล้วน ๆ เท่านั้น ห้ามมีคำอธิบาย ห้ามมี "แปลว่า" หรือเครื่องหมายพิเศษ ห้ามตอบภาษาอื่น"""

    user_prompt = f"แปลข้อความภาษา{source_lang}นี้เป็นไทย:\n{text}"

    messages = [
        {"role": "system", "content": system_prompt},
        {"role": "user", "content": user_prompt}
    ]

    # Apply chat template (Typhoon ใช้ format คล้าย Mistral/Gemma)
    inputs = tokenizer.apply_chat_template(
        messages,
        add_generation_prompt=True,
        return_tensors="pt"
    ).to(model.device)

    with torch.no_grad():
        output_ids = model.generate(
            inputs,
            max_new_tokens=150,          # เพิ่มเผื่อ bubble ยาว
            temperature=0.7,
            top_p=0.9,
            repetition_penalty=1.1,
            do_sample=True
        )

    response = tokenizer.decode(output_ids[0], skip_special_tokens=True)

    # Clean output: ตัดส่วน prompt ออก เหลือแต่คำแปล
    # Typhoon มักตอบหลัง "assistant" หรือตรง ๆ
    if "assistant" in response.lower():
        response = response.split("assistant", 1)[-1].strip()
    # ลบ input ที่อาจติดมาด้วย regex
    response = re.sub(r'^.*?:\s*', '', response).strip()
    response = response.replace(text, "").strip()  # ลบข้อความต้นฉบับถ้าติด

    return response if response else "แปลไม่ได้ ลองข้อความอื่นหรือปรับ prompt"

# Gradio Interface
demo = gr.Interface(
    fn=translate_to_thai,
    inputs=[
        gr.Textbox(
            label="ใส่ข้อความที่จะแปล (English หรือ Japanese จากมังงะ)",
            placeholder="เช่น: Damn it! Run away!! หรือ くそっ！逃げろ！！",
            lines=5
        ),
        gr.Dropdown(
            choices=["English", "Japanese"],
            value="English",
            label="ภาษาต้นทาง"
        )
    ],
    outputs=gr.Textbox(label="แปลเป็นภาษาไทย (ธรรมชาติ สไตล์มังงะ)"),
    title="Manga Translator: English/Japanese → Thai",
    description="""ใช้โมเดล Typhoon Translate 4B แปลบทสนทนามังงะเป็นไทยธรรมชาติ รัน local/offline ได้ ส่งคืนเฉพาะข้อความไทยล้วน
    - ตัวอย่าง: "What the hell?!" → "เฮ้ย อะไรวะเนี่ย!"
    - ถ้าเพี้ยน ลองเพิ่ม context เช่น "ตัวเอกพูดห้าว ๆ:" นำหน้า text
    """,
    examples=[
        ["Damn it! We need to run now!!", "English"],
        ["くそっ！逃げろ！！", "Japanese"],
        ["Ohayo! Kyou mo kawaii ne\~", "Japanese"],
        ["This is the best day ever!", "English"]
    ],
    theme=gr.themes.Soft(primary_hue="blue")
)

# เปิด app
demo.launch(server_name="0.0.0.0", server_port=7860)