Trans / app.py
Jay4769's picture
Update app.py
6a31219 verified
import gradio as gr
import requests
from bs4 import BeautifulSoup
from transformers import (
AutoTokenizer,
AutoModelForSeq2SeqLM
)
# =========================
# MODELS
# =========================
# RU -> VI
ruvi_model_name = "Helsinki-NLP/opus-mt-ru-vi"
ruvi_tokenizer = AutoTokenizer.from_pretrained(
ruvi_model_name
)
ruvi_model = AutoModelForSeq2SeqLM.from_pretrained(
ruvi_model_name
)
# VI -> RU
viru_model_name = "Helsinki-NLP/opus-mt-vi-ru"
viru_tokenizer = AutoTokenizer.from_pretrained(
viru_model_name
)
viru_model = AutoModelForSeq2SeqLM.from_pretrained(
viru_model_name
)
# =========================
# WIKTIONARY SCRAPER
# =========================
def get_russian_info(word):
try:
url = f"https://ru.wiktionary.org/wiki/{word}"
headers = {
"User-Agent": "Mozilla/5.0"
}
response = requests.get(
url,
headers=headers,
timeout=10
)
soup = BeautifulSoup(
response.text,
"html.parser"
)
# =====================
# DEFINITIONS
# =====================
definitions = []
ols = soup.find_all("ol")
for ol in ols[:2]:
items = ol.find_all("li")
for item in items[:5]:
text = item.get_text(
" ",
strip=True
)
if len(text) > 20:
definitions.append(text)
# =====================
# EXAMPLES
# =====================
examples = []
quotes = soup.find_all(
"span",
class_="example-block"
)
for q in quotes[:5]:
txt = q.get_text(
" ",
strip=True
)
if txt:
examples.append(txt)
# fallback example search
if len(examples) == 0:
for i in soup.find_all("i")[:10]:
txt = i.get_text(
" ",
strip=True
)
if (
len(txt) > 15
and word.lower()
in txt.lower()
):
examples.append(txt)
definition_text = (
"\n\n".join(definitions[:5])
if definitions
else "Không tìm thấy định nghĩa"
)
example_text = (
"\n\n".join(examples[:5])
if examples
else "Không có ví dụ"
)
return (
definition_text,
example_text
)
except Exception as e:
return (
f"Lỗi định nghĩa: {str(e)}",
"Không có ví dụ"
)
# =========================
# TRANSLATION FUNCTIONS
# =========================
def ru_to_vi(word):
inputs = ruvi_tokenizer(
word,
return_tensors="pt"
)
outputs = ruvi_model.generate(**inputs)
translated = ruvi_tokenizer.decode(
outputs[0],
skip_special_tokens=True
)
definition, example = (
get_russian_info(word)
)
return (
translated,
definition,
example
)
def vi_to_ru(word):
inputs = viru_tokenizer(
word,
return_tensors="pt"
)
outputs = viru_model.generate(**inputs)
translated = viru_tokenizer.decode(
outputs[0],
skip_special_tokens=True
)
return translated
# =========================
# UI
# =========================
with gr.Blocks() as demo:
gr.Markdown(
"# 🇷🇺⇄🇻🇳 Russian ↔ Vietnamese Dictionary"
)
with gr.Tab("🇷🇺 Nga → Việt"):
ru_input = gr.Textbox(
placeholder="Nhập tiếng Nga..."
)
vi_output = gr.Textbox(
label="🇻🇳 Nghĩa tiếng Việt"
)
definition_output = gr.Textbox(
label="📖 Định nghĩa"
)
example_output = gr.Textbox(
label="💬 Ví dụ"
)
ru_btn = gr.Button("Tra cứu")
ru_btn.click(
ru_to_vi,
inputs=ru_input,
outputs=[
vi_output,
definition_output,
example_output
]
)
with gr.Tab("🇻🇳 Việt → Nga"):
vi_input = gr.Textbox(
placeholder="Nhập tiếng Việt..."
)
ru_output = gr.Textbox(
label="🇷🇺 Nghĩa tiếng Nga"
)
vi_btn = gr.Button("Dịch")
vi_btn.click(
vi_to_ru,
inputs=vi_input,
outputs=ru_output
)
demo.launch(server_name="0.0.0.0")