Spaces:

Jay4769
/

Trans

Running

App Files Files Community

Trans / app.py

Jay4769

Update app.py

6a31219 verified 3 days ago

raw

history blame contribute delete

4.81 kB

	import gradio as gr
	import requests
	from bs4 import BeautifulSoup

	from transformers import (
	AutoTokenizer,
	AutoModelForSeq2SeqLM
	)

	# =========================
	# MODELS
	# =========================

	# RU -> VI
	ruvi_model_name = "Helsinki-NLP/opus-mt-ru-vi"

	ruvi_tokenizer = AutoTokenizer.from_pretrained(
	ruvi_model_name
	)

	ruvi_model = AutoModelForSeq2SeqLM.from_pretrained(
	ruvi_model_name
	)

	# VI -> RU
	viru_model_name = "Helsinki-NLP/opus-mt-vi-ru"

	viru_tokenizer = AutoTokenizer.from_pretrained(
	viru_model_name
	)

	viru_model = AutoModelForSeq2SeqLM.from_pretrained(
	viru_model_name
	)

	# =========================
	# WIKTIONARY SCRAPER
	# =========================

	def get_russian_info(word):

	try:

	url = f"https://ru.wiktionary.org/wiki/{word}"

	headers = {
	"User-Agent": "Mozilla/5.0"
	}

	response = requests.get(
	url,
	headers=headers,
	timeout=10
	)

	soup = BeautifulSoup(
	response.text,
	"html.parser"
	)

	# =====================
	# DEFINITIONS
	# =====================

	definitions = []

	ols = soup.find_all("ol")

	for ol in ols[:2]:

	items = ol.find_all("li")

	for item in items[:5]:

	text = item.get_text(
	" ",
	strip=True
	)

	if len(text) > 20:
	definitions.append(text)

	# =====================
	# EXAMPLES
	# =====================

	examples = []

	quotes = soup.find_all(
	"span",
	class_="example-block"
	)

	for q in quotes[:5]:

	txt = q.get_text(
	" ",
	strip=True
	)

	if txt:
	examples.append(txt)

	# fallback example search
	if len(examples) == 0:

	for i in soup.find_all("i")[:10]:

	txt = i.get_text(
	" ",
	strip=True
	)

	if (
	len(txt) > 15
	and word.lower()
	in txt.lower()
	):
	examples.append(txt)

	definition_text = (
	"\n\n".join(definitions[:5])
	if definitions
	else "Không tìm thấy định nghĩa"
	)

	example_text = (
	"\n\n".join(examples[:5])
	if examples
	else "Không có ví dụ"
	)

	return (
	definition_text,
	example_text
	)

	except Exception as e:

	return (
	f"Lỗi định nghĩa: {str(e)}",
	"Không có ví dụ"
	)

	# =========================
	# TRANSLATION FUNCTIONS
	# =========================

	def ru_to_vi(word):

	inputs = ruvi_tokenizer(
	word,
	return_tensors="pt"
	)

	outputs = ruvi_model.generate(**inputs)

	translated = ruvi_tokenizer.decode(
	outputs[0],
	skip_special_tokens=True
	)

	definition, example = (
	get_russian_info(word)
	)

	return (
	translated,
	definition,
	example
	)

	def vi_to_ru(word):

	inputs = viru_tokenizer(
	word,
	return_tensors="pt"
	)

	outputs = viru_model.generate(**inputs)

	translated = viru_tokenizer.decode(
	outputs[0],
	skip_special_tokens=True
	)

	return translated

	# =========================
	# UI
	# =========================

	with gr.Blocks() as demo:

	gr.Markdown(
	"# 🇷🇺⇄🇻🇳 Russian ↔ Vietnamese Dictionary"
	)

	with gr.Tab("🇷🇺 Nga → Việt"):

	ru_input = gr.Textbox(
	placeholder="Nhập tiếng Nga..."
	)

	vi_output = gr.Textbox(
	label="🇻🇳 Nghĩa tiếng Việt"
	)

	definition_output = gr.Textbox(
	label="📖 Định nghĩa"
	)

	example_output = gr.Textbox(
	label="💬 Ví dụ"
	)

	ru_btn = gr.Button("Tra cứu")

	ru_btn.click(
	ru_to_vi,
	inputs=ru_input,
	outputs=[
	vi_output,
	definition_output,
	example_output
	]
	)

	with gr.Tab("🇻🇳 Việt → Nga"):

	vi_input = gr.Textbox(
	placeholder="Nhập tiếng Việt..."
	)

	ru_output = gr.Textbox(
	label="🇷🇺 Nghĩa tiếng Nga"
	)

	vi_btn = gr.Button("Dịch")

	vi_btn.click(
	vi_to_ru,
	inputs=vi_input,
	outputs=ru_output
	)

	demo.launch(server_name="0.0.0.0")