Spaces:

ranbac
/

trans

Sleeping

App Files Files Community

trans / app.py

ranbac

Update app.py

5442578 verified 4 days ago

raw

history blame contribute delete

3.36 kB

	import gradio as gr
	import torch
	import os
	from transformers import AutoTokenizer, AutoModelForCausalLM

	# Khai báo model và token
	model_id = "google/translategemma-4b-it"
	hf_token = os.environ.get("HF_TOKEN")

	print("Đang tải mô hình, vui lòng đợi... (có thể mất vài phút trên CPU)")
	tokenizer = AutoTokenizer.from_pretrained(model_id, token=hf_token)
	model = AutoModelForCausalLM.from_pretrained(
	model_id,
	torch_dtype=torch.bfloat16,
	device_map="cpu",
	token=hf_token
	)
	print("Tải mô hình thành công!")

	def translate_text(text, direction):
	if not text.strip():
	return "Vui lòng nhập văn bản cần dịch."

	# Xác định mã ngôn ngữ dựa trên lựa chọn của người dùng
	if direction == "🇨🇳 Trung sang 🇻🇳 Việt":
	source_lang = "zh"
	target_lang = "vi"
	else:
	source_lang = "vi"
	target_lang = "zh"

	# Cấu trúc messages chuẩn xác theo tài liệu của TranslateGemma
	messages = [
	{
	"role": "user",
	"content": [
	{
	"type": "text",
	"source_lang_code": source_lang,
	"target_lang_code": target_lang,
	"text": text.strip()
	}
	]
	}
	]

	try:
	# Áp dụng template
	prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
	inputs = tokenizer(prompt, return_tensors="pt").to("cpu")

	# Sinh văn bản (Greedy decoding cho dịch thuật)
	with torch.no_grad():
	outputs = model.generate(
	**inputs,
	max_new_tokens=512,
	do_sample=False
	)

	# Cắt bỏ phần prompt trong output để lấy kết quả dịch
	input_length = inputs['input_ids'].shape[1]
	translated_text = tokenizer.decode(outputs[0][input_length:], skip_special_tokens=True)

	return translated_text

	except Exception as e:
	return f"⚠️ Đã xảy ra lỗi: {str(e)}"

	# Xây dựng giao diện bằng Gradio
	with gr.Blocks(title="Dịch Trung - Việt \| TranslateGemma") as demo:
	gr.Markdown("## 🇨🇳🇻🇳 Công cụ dịch Trung - Việt (TranslateGemma-4b-it)")
	gr.Markdown("Hệ thống chạy trên CPU nên sẽ mất khoảng 10-30s cho mỗi lần dịch.")

	with gr.Row():
	with gr.Column():
	# Thay thế ô nhập text bằng nút chọn trực quan
	direction_radio = gr.Radio(
	choices=["🇨🇳 Trung sang 🇻🇳 Việt", "🇻🇳 Việt sang 🇨🇳 Trung"],
	value="🇨🇳 Trung sang 🇻🇳 Việt",
	label="Chọn chiều dịch"
	)
	input_text = gr.Textbox(label="Văn bản cần dịch", lines=6, placeholder="Nhập văn bản vào đây...")
	translate_btn = gr.Button("Dịch", variant="primary")

	with gr.Column():
	output_text = gr.Textbox(label="Kết quả", lines=9, interactive=False)

	translate_btn.click(
	fn=translate_text,
	inputs=[input_text, direction_radio],
	outputs=output_text
	)

	if __name__ == "__main__":
	demo.launch()