Spaces:

Havyle
/

VSL-Translation-Demo

Sleeping

App Files Files Community

VSL-Translation-Demo / app.py

Havyle

Update app.py

37cbc4d verified about 1 month ago

raw

history blame contribute delete

9.49 kB

	import torch
	import gradio as gr
	from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM

	# ==========================================
	# 1. CẤU HÌNH & LOAD MODEL (BACKEND)
	# ==========================================
	DEVICE = "cuda" if torch.cuda.is_available() else "cpu"

	# --- Load PhoWhisper ---
	print("Đang tải model PhoWhisper...")
	try:
	asr_pipeline = pipeline(
	"automatic-speech-recognition",
	model="vinai/PhoWhisper-small",
	device=0 if torch.cuda.is_available() else -1
	)
	except Exception as e:
	print(f"Lỗi load PhoWhisper: {e}")
	asr_pipeline = None

	# --- Load ViT5 Translation ---
	TRANSLATION_MODEL_PATH = "."

	print(f"Đang tải model dịch...")
	try:
	trans_tokenizer = AutoTokenizer.from_pretrained(TRANSLATION_MODEL_PATH)
	trans_model = AutoModelForSeq2SeqLM.from_pretrained(TRANSLATION_MODEL_PATH).to(DEVICE)
	print("Load model dịch thành công!")
	except Exception as e:
	print(f"Lỗi load model dịch: {e}")
	trans_model = None

	# ==========================================
	# 2. HÀM XỬ LÝ LOGIC
	# ==========================================
	def speech_to_text(audio_path):
	if audio_path is None: return ""
	if asr_pipeline is None: return "Lỗi: Chưa load được PhoWhisper."
	try:
	output = asr_pipeline(audio_path)
	return output['text']
	except Exception as e:
	return f"Lỗi nhận dạng: {str(e)}"

	def text_to_gloss(vietnamese_text):
	if not vietnamese_text: return ""
	if trans_model is None: return "Lỗi: Chưa load được model dịch."

	input_text = f"vi: {vietnamese_text}"
	inputs = trans_tokenizer(input_text, return_tensors="pt", max_length=128, truncation=True).to(DEVICE)

	with torch.no_grad():
	outputs = trans_model.generate(
	inputs["input_ids"],
	max_length=128,
	num_beams=5,
	early_stopping=True
	)

	gloss_text = trans_tokenizer.decode(outputs[0], skip_special_tokens=True)
	return gloss_text.replace("vsl: ", "") if gloss_text.startswith("vsl: ") else gloss_text

	def full_pipeline(audio, text_input, mode):
	vietnamese_output = ""
	if mode == "Giọng nói (Microphone/File)" and audio is not None:
	vietnamese_output = speech_to_text(audio)
	elif mode == "Văn bản (Nhập tay)" and text_input:
	vietnamese_output = text_input
	else:
	if mode == "Giọng nói (Microphone/File)":
	return "⚠️ Vui lòng thu âm hoặc tải file.", ""
	else:
	return "⚠️ Vui lòng nhập văn bản.", ""

	gloss_output = text_to_gloss(vietnamese_output)
	return vietnamese_output, gloss_output

	# ==========================================
	# 3. GIAO DIỆN COFFEE THEME (UI/UX)
	# ==========================================

	# CSS Tông màu Cà phê
	custom_css = """
	@import url('https://fonts.googleapis.com/css2?family=Lora:ital,wght@0,400;0,700;1,400&family=Roboto:wght@300;400;500&display=swap');

	body {
	font-family: 'Roboto', sans-serif;
	background-color: #fdfbf7;
	}

	/* --- HEADER --- */
	.header-container {
	background: linear-gradient(135deg, #3e2723 0%, #5d4037 100%);
	padding: 30px;
	border-radius: 15px;
	text-align: center;
	box-shadow: 0 6px 12px rgba(62, 39, 35, 0.3);
	margin-bottom: 25px;
	border-bottom: 4px solid #8d6e63;
	}

	/* Ép màu trắng bằng !important */
	.uni-name {
	font-family: 'Roboto', sans-serif;
	font-size: 16px;
	text-transform: uppercase;
	letter-spacing: 2px;
	color: #ffffff !important; /* Màu trắng */
	font-weight: bold;
	opacity: 0.9;
	}

	.project-name {
	font-family: 'Lora', serif;
	font-size: 32px;
	font-weight: 700;
	margin: 15px 0;
	text-shadow: 2px 2px 4px rgba(0,0,0,0.3);
	color: #ffffff !important; /* Màu trắng */
	}

	.author-name {
	font-size: 14px;
	font-style: italic;
	color: #ffffff !important; /* Màu trắng */
	border-top: 1px solid rgba(255,255,255,0.3);
	display: inline-block;
	padding-top: 10px;
	margin-top: 5px;
	}

	/* --- BUTTONS --- */
	button.primary-btn {
	background-color: #6d4c41 !important;
	color: white !important;
	font-weight: 600;
	border-radius: 8px;
	border: none;
	transition: all 0.3s;
	}

	button.primary-btn:hover {
	background-color: #5d4037 !important;
	transform: translateY(-2px);
	box-shadow: 0 6px 10px rgba(109, 76, 65, 0.3);
	}

	/* --- OTHER --- */
	.label-text {
	font-weight: bold;
	color: #4e342e;
	margin-bottom: 5px;
	font-size: 1.1em;
	}

	.gloss-box textarea {
	font-family: 'Roboto', sans-serif;
	font-size: 22px !important;
	font-weight: bold;
	color: #bf360c !important;
	background-color: #fff3e0 !important;
	border: 1px solid #ffccbc !important;
	}

	.footer {
	text-align: center;
	color: #8d6e63;
	font-size: 12px;
	margin-top: 40px;
	border-top: 1px solid #d7ccc8;
	padding-top: 20px;
	}
	"""

	# Tạo Theme
	coffee_theme = gr.themes.Soft(
	primary_hue="orange",
	secondary_hue="yellow",
	neutral_hue="gray",
	).set(
	body_background_fill="#fcf9f2",
	block_background_fill="#ffffff",
	block_border_width="1px",
	block_shadow="0 2px 4px rgba(0,0,0,0.05)",
	button_primary_background_fill="#6d4c41",
	button_primary_background_fill_hover="#5d4037",
	button_primary_text_color="white",
	slider_color="#8d6e63"
	)

	with gr.Blocks(css=custom_css, theme=coffee_theme, title="VSL Coffee Translator") as demo:

	# --- HEADER ---
	gr.HTML(
	"""
	<div class="header-container">
	<div class="uni-name">Trường Đại học Kinh tế - Đại học Đà Nẵng</div>
	<div class="project-name">☕ HỆ THỐNG DỊCH VSL - ANGEL COFFEE ☕</div>
	<div class="author-name">Sinh viên: Lê Thị Hà Vy \| GVHD: Th.S Nguyễn Văn Chức</div>
	</div>
	"""
	)

	# --- MAIN CONTENT ---
	with gr.Row():

	# CỘT TRÁI: INPUT
	with gr.Column(scale=1, variant="panel"):
	gr.Markdown("### 🎙️ Nhập yêu cầu gọi món", elem_classes="label-text")

	with gr.Tabs():
	# Tab Audio
	with gr.TabItem("Ghi âm / Tải file"):
	input_audio = gr.Audio(
	sources=["microphone", "upload"],
	type="filepath",
	label="Nói câu gọi món..."
	)
	mode_audio = gr.State(value="Giọng nói (Microphone/File)")
	btn_audio = gr.Button("☕ Xử lý Giọng nói", variant="primary", elem_classes="primary-btn")

	# Tab Text
	with gr.TabItem("Nhập văn bản"):
	input_text = gr.Textbox(
	label="Nhập câu tiếng Việt",
	placeholder="Ví dụ: Cho tôi một ly cà phê sữa ít đường...",
	lines=4
	)
	mode_text = gr.State(value="Văn bản (Nhập tay)")

	# Gợi ý câu mẫu
	gr.Examples(
	examples=[
	["Tôi gọi một ly cà phê muối"],
	["Lấy cho mình một bạc xỉu ít ngọt"],
	["Tôi muốn thanh toán tiền"],
	["Cảm ơn bạn rất nhiều"]
	],
	inputs=input_text,
	label="Gợi ý gọi món"
	)

	btn_text = gr.Button("☕ Dịch Văn bản", variant="primary", elem_classes="primary-btn")

	# CỘT PHẢI: OUTPUT
	with gr.Column(scale=1, variant="panel"):
	gr.Markdown("### ✨ Kết quả dịch (VSL Gloss)", elem_classes="label-text")

	# Kết quả trung gian
	gr.Label("Bước 1: Nhận dạng Tiếng Việt", show_label=False, color="orange")
	output_vi = gr.Textbox(
	label="Văn bản Tiếng Việt",
	interactive=False,
	show_copy_button=True,
	lines=2
	)

	gr.HTML("<br>")

	# Kết quả cuối cùng
	gr.Label("Bước 2: Cú pháp Ký hiệu (VSL)", show_label=False, color="yellow")
	output_gloss = gr.Textbox(
	label="VSL Gloss Output",
	interactive=False,
	show_copy_button=True,
	elem_classes="gloss-box",
	lines=3
	)

	# --- FOOTER ---
	gr.HTML(
	"""
	<div class="footer">
	Dự án hỗ trợ giao tiếp cho người khiếm thính tại Angel Coffee Đà Nẵng.<br>
	Powered by <b>PhoWhisper</b> & <b>ViT5</b> (Fine-tuned on Coffee Dataset).
	</div>
	"""
	)

	# --- XỬ LÝ SỰ KIỆN ---
	btn_audio.click(
	fn=full_pipeline,
	inputs=[input_audio, input_text, mode_audio],
	outputs=[output_vi, output_gloss]
	)

	btn_text.click(
	fn=full_pipeline,
	inputs=[input_audio, input_text, mode_text],
	outputs=[output_vi, output_gloss]
	)

	# Chạy app
	if __name__ == "__main__":
	demo.launch()