| import gradio as gr |
| import requests |
| from bs4 import BeautifulSoup |
|
|
| from transformers import ( |
| AutoTokenizer, |
| AutoModelForSeq2SeqLM |
| ) |
|
|
| |
| |
| |
|
|
| |
| ruvi_model_name = "Helsinki-NLP/opus-mt-ru-vi" |
|
|
| ruvi_tokenizer = AutoTokenizer.from_pretrained( |
| ruvi_model_name |
| ) |
|
|
| ruvi_model = AutoModelForSeq2SeqLM.from_pretrained( |
| ruvi_model_name |
| ) |
|
|
| |
| viru_model_name = "Helsinki-NLP/opus-mt-vi-ru" |
|
|
| viru_tokenizer = AutoTokenizer.from_pretrained( |
| viru_model_name |
| ) |
|
|
| viru_model = AutoModelForSeq2SeqLM.from_pretrained( |
| viru_model_name |
| ) |
|
|
| |
| |
| |
|
|
| def get_russian_info(word): |
|
|
| try: |
|
|
| url = f"https://ru.wiktionary.org/wiki/{word}" |
|
|
| headers = { |
| "User-Agent": "Mozilla/5.0" |
| } |
|
|
| response = requests.get( |
| url, |
| headers=headers, |
| timeout=10 |
| ) |
|
|
| soup = BeautifulSoup( |
| response.text, |
| "html.parser" |
| ) |
|
|
| |
| |
| |
|
|
| definitions = [] |
|
|
| ols = soup.find_all("ol") |
|
|
| for ol in ols[:2]: |
|
|
| items = ol.find_all("li") |
|
|
| for item in items[:5]: |
|
|
| text = item.get_text( |
| " ", |
| strip=True |
| ) |
|
|
| if len(text) > 20: |
| definitions.append(text) |
|
|
| |
| |
| |
|
|
| examples = [] |
|
|
| quotes = soup.find_all( |
| "span", |
| class_="example-block" |
| ) |
|
|
| for q in quotes[:5]: |
|
|
| txt = q.get_text( |
| " ", |
| strip=True |
| ) |
|
|
| if txt: |
| examples.append(txt) |
|
|
| |
| if len(examples) == 0: |
|
|
| for i in soup.find_all("i")[:10]: |
|
|
| txt = i.get_text( |
| " ", |
| strip=True |
| ) |
|
|
| if ( |
| len(txt) > 15 |
| and word.lower() |
| in txt.lower() |
| ): |
| examples.append(txt) |
|
|
| definition_text = ( |
| "\n\n".join(definitions[:5]) |
| if definitions |
| else "Không tìm thấy định nghĩa" |
| ) |
|
|
| example_text = ( |
| "\n\n".join(examples[:5]) |
| if examples |
| else "Không có ví dụ" |
| ) |
|
|
| return ( |
| definition_text, |
| example_text |
| ) |
|
|
| except Exception as e: |
|
|
| return ( |
| f"Lỗi định nghĩa: {str(e)}", |
| "Không có ví dụ" |
| ) |
|
|
| |
| |
| |
|
|
| def ru_to_vi(word): |
|
|
| inputs = ruvi_tokenizer( |
| word, |
| return_tensors="pt" |
| ) |
|
|
| outputs = ruvi_model.generate(**inputs) |
|
|
| translated = ruvi_tokenizer.decode( |
| outputs[0], |
| skip_special_tokens=True |
| ) |
|
|
| definition, example = ( |
| get_russian_info(word) |
| ) |
|
|
| return ( |
| translated, |
| definition, |
| example |
| ) |
|
|
| def vi_to_ru(word): |
|
|
| inputs = viru_tokenizer( |
| word, |
| return_tensors="pt" |
| ) |
|
|
| outputs = viru_model.generate(**inputs) |
|
|
| translated = viru_tokenizer.decode( |
| outputs[0], |
| skip_special_tokens=True |
| ) |
|
|
| return translated |
|
|
| |
| |
| |
|
|
| with gr.Blocks() as demo: |
|
|
| gr.Markdown( |
| "# 🇷🇺⇄🇻🇳 Russian ↔ Vietnamese Dictionary" |
| ) |
|
|
| with gr.Tab("🇷🇺 Nga → Việt"): |
|
|
| ru_input = gr.Textbox( |
| placeholder="Nhập tiếng Nga..." |
| ) |
|
|
| vi_output = gr.Textbox( |
| label="🇻🇳 Nghĩa tiếng Việt" |
| ) |
|
|
| definition_output = gr.Textbox( |
| label="📖 Định nghĩa" |
| ) |
|
|
| example_output = gr.Textbox( |
| label="💬 Ví dụ" |
| ) |
|
|
| ru_btn = gr.Button("Tra cứu") |
|
|
| ru_btn.click( |
| ru_to_vi, |
| inputs=ru_input, |
| outputs=[ |
| vi_output, |
| definition_output, |
| example_output |
| ] |
| ) |
|
|
| with gr.Tab("🇻🇳 Việt → Nga"): |
|
|
| vi_input = gr.Textbox( |
| placeholder="Nhập tiếng Việt..." |
| ) |
|
|
| ru_output = gr.Textbox( |
| label="🇷🇺 Nghĩa tiếng Nga" |
| ) |
|
|
| vi_btn = gr.Button("Dịch") |
|
|
| vi_btn.click( |
| vi_to_ru, |
| inputs=vi_input, |
| outputs=ru_output |
| ) |
|
|
| demo.launch(server_name="0.0.0.0") |