Jay4769 commited on
Commit
db77a3e
·
verified ·
1 Parent(s): b371da3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +111 -42
app.py CHANGED
@@ -1,32 +1,97 @@
1
  import gradio as gr
2
- from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
 
 
 
 
 
 
 
 
 
 
3
 
4
  # RU -> VI
5
  ruvi_model_name = "Helsinki-NLP/opus-mt-ru-vi"
6
- ruvi_tokenizer = AutoTokenizer.from_pretrained(ruvi_model_name)
7
- ruvi_model = AutoModelForSeq2SeqLM.from_pretrained(ruvi_model_name)
 
 
 
 
 
 
8
 
9
  # VI -> RU
10
  viru_model_name = "Helsinki-NLP/opus-mt-vi-ru"
11
- viru_tokenizer = AutoTokenizer.from_pretrained(viru_model_name)
12
- viru_model = AutoModelForSeq2SeqLM.from_pretrained(viru_model_name)
13
-
14
- # Simple synonym databases
15
- ru_synonyms = {
16
- "хороший": ["отличный", "прекрасный", "добрый"],
17
- "плохой": ["ужасный", "скверный"],
18
- "большой": ["огромный", "крупный"]
19
- }
20
-
21
- vi_synonyms = {
22
- "đẹp": ["xinh", "đẹp đẽ", "xinh đẹp"],
23
- "lớn": ["to", "khổng lồ", "rộng"],
24
- "tốt": ["hay", "ổn", "tuyệt"]
25
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
 
27
  def ru_to_vi(word):
28
 
29
- inputs = ruvi_tokenizer(word, return_tensors="pt")
 
 
 
30
 
31
  outputs = ruvi_model.generate(**inputs)
32
 
@@ -35,16 +100,17 @@ def ru_to_vi(word):
35
  skip_special_tokens=True
36
  )
37
 
38
- syns = ru_synonyms.get(
39
- word.lower(),
40
- ["Không tìm thấy"]
41
- )
42
 
43
- return translated, ", ".join(syns)
44
 
45
  def vi_to_ru(word):
46
 
47
- inputs = viru_tokenizer(word, return_tensors="pt")
 
 
 
48
 
49
  outputs = viru_model.generate(**inputs)
50
 
@@ -53,16 +119,22 @@ def vi_to_ru(word):
53
  skip_special_tokens=True
54
  )
55
 
56
- syns = vi_synonyms.get(
57
- word.lower(),
58
- ["Không tìm thấy"]
59
- )
60
 
61
- return translated, ", ".join(syns)
 
 
 
62
 
63
  with gr.Blocks() as demo:
64
 
65
- gr.Markdown("# 🇷🇺⇄🇻🇳 Russian ↔ Vietnamese Dictionary")
 
 
 
 
 
 
66
 
67
  with gr.Tab("🇷🇺 Nga → Việt"):
68
 
@@ -71,7 +143,7 @@ with gr.Blocks() as demo:
71
  )
72
 
73
  vi_output = gr.Textbox(
74
- label="🇻🇳 Nghĩa"
75
  )
76
 
77
  ru_syn_output = gr.Textbox(
@@ -89,6 +161,10 @@ with gr.Blocks() as demo:
89
  ]
90
  )
91
 
 
 
 
 
92
  with gr.Tab("🇻🇳 Việt → Nga"):
93
 
94
  vi_input = gr.Textbox(
@@ -96,22 +172,15 @@ with gr.Blocks() as demo:
96
  )
97
 
98
  ru_output = gr.Textbox(
99
- label="🇷🇺 Nghĩa"
100
  )
101
 
102
- vi_syn_output = gr.Textbox(
103
- label="📚 Từ đồng nghĩa"
104
- )
105
-
106
- vi_btn = gr.Button("Tra cứu")
107
 
108
  vi_btn.click(
109
  vi_to_ru,
110
  inputs=vi_input,
111
- outputs=[
112
- ru_output,
113
- vi_syn_output
114
- ]
115
  )
116
 
117
  demo.launch(server_name="0.0.0.0")
 
1
  import gradio as gr
2
+ import requests
3
+ from bs4 import BeautifulSoup
4
+
5
+ from transformers import (
6
+ AutoTokenizer,
7
+ AutoModelForSeq2SeqLM
8
+ )
9
+
10
+ # =========================
11
+ # MODELS
12
+ # =========================
13
 
14
  # RU -> VI
15
  ruvi_model_name = "Helsinki-NLP/opus-mt-ru-vi"
16
+
17
+ ruvi_tokenizer = AutoTokenizer.from_pretrained(
18
+ ruvi_model_name
19
+ )
20
+
21
+ ruvi_model = AutoModelForSeq2SeqLM.from_pretrained(
22
+ ruvi_model_name
23
+ )
24
 
25
  # VI -> RU
26
  viru_model_name = "Helsinki-NLP/opus-mt-vi-ru"
27
+
28
+ viru_tokenizer = AutoTokenizer.from_pretrained(
29
+ viru_model_name
30
+ )
31
+
32
+ viru_model = AutoModelForSeq2SeqLM.from_pretrained(
33
+ viru_model_name
34
+ )
35
+
36
+ # =========================
37
+ # SYNONYM FUNCTIONS
38
+ # =========================
39
+
40
+ def get_ru_synonyms(word):
41
+
42
+ try:
43
+
44
+ url = f"https://sinonim.org/s/{word}"
45
+
46
+ headers = {
47
+ "User-Agent": "Mozilla/5.0"
48
+ }
49
+
50
+ response = requests.get(
51
+ url,
52
+ headers=headers,
53
+ timeout=10
54
+ )
55
+
56
+ soup = BeautifulSoup(
57
+ response.text,
58
+ "html.parser"
59
+ )
60
+
61
+ syns = []
62
+
63
+ for a in soup.select("a"):
64
+
65
+ text = a.text.strip()
66
+
67
+ if (
68
+ text
69
+ and text.lower() != word.lower()
70
+ and len(text) > 1
71
+ ):
72
+ syns.append(text)
73
+
74
+ syns = list(dict.fromkeys(syns))
75
+
76
+ if len(syns) == 0:
77
+ return "Không tìm thấy"
78
+
79
+ return ", ".join(syns[:10])
80
+
81
+ except Exception:
82
+ return "Lỗi tìm synonym"
83
+
84
+
85
+ # =========================
86
+ # TRANSLATION FUNCTIONS
87
+ # =========================
88
 
89
  def ru_to_vi(word):
90
 
91
+ inputs = ruvi_tokenizer(
92
+ word,
93
+ return_tensors="pt"
94
+ )
95
 
96
  outputs = ruvi_model.generate(**inputs)
97
 
 
100
  skip_special_tokens=True
101
  )
102
 
103
+ syns = get_ru_synonyms(word)
104
+
105
+ return translated, syns
 
106
 
 
107
 
108
  def vi_to_ru(word):
109
 
110
+ inputs = viru_tokenizer(
111
+ word,
112
+ return_tensors="pt"
113
+ )
114
 
115
  outputs = viru_model.generate(**inputs)
116
 
 
119
  skip_special_tokens=True
120
  )
121
 
122
+ return translated
 
 
 
123
 
124
+
125
+ # =========================
126
+ # UI
127
+ # =========================
128
 
129
  with gr.Blocks() as demo:
130
 
131
+ gr.Markdown(
132
+ "# 🇷🇺⇄🇻🇳 Russian ↔ Vietnamese Dictionary"
133
+ )
134
+
135
+ # -------------------------
136
+ # RU -> VI
137
+ # -------------------------
138
 
139
  with gr.Tab("🇷🇺 Nga → Việt"):
140
 
 
143
  )
144
 
145
  vi_output = gr.Textbox(
146
+ label="🇻🇳 Nghĩa tiếng Việt"
147
  )
148
 
149
  ru_syn_output = gr.Textbox(
 
161
  ]
162
  )
163
 
164
+ # -------------------------
165
+ # VI -> RU
166
+ # -------------------------
167
+
168
  with gr.Tab("🇻🇳 Việt → Nga"):
169
 
170
  vi_input = gr.Textbox(
 
172
  )
173
 
174
  ru_output = gr.Textbox(
175
+ label="🇷🇺 Nghĩa tiếng Nga"
176
  )
177
 
178
+ vi_btn = gr.Button("Dịch")
 
 
 
 
179
 
180
  vi_btn.click(
181
  vi_to_ru,
182
  inputs=vi_input,
183
+ outputs=ru_output
 
 
 
184
  )
185
 
186
  demo.launch(server_name="0.0.0.0")