student2222333051 commited on
Commit
8f89686
·
verified ·
1 Parent(s): ea536a2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +81 -85
app.py CHANGED
@@ -1,89 +1,85 @@
1
  import gradio as gr
2
- from transformers import MarianMTModel, MarianTokenizer
3
- from nltk.translate.bleu_score import sentence_bleu, SmoothingFunction
4
-
5
- # =============================
6
- # Словарь доступных моделей
7
- # =============================
8
- model_options = {
9
- "Русский → Английский": "Helsinki-NLP/opus-mt-ru-en",
10
- "Английский → Русский": "Helsinki-NLP/opus-mt-en-ru",
11
- "Казахский Русский": "Helsinki-NLP/opus-mt-kk-ru",
12
- "Русский Казахский": "Helsinki-NLP/opus-mt-ru-kk",
13
- }
14
-
15
- # =============================
16
- # Кэш моделей для ускорения
17
- # =============================
18
- model_cache = {}
19
-
20
- def load_translation_model(model_name):
21
- if model_name in model_cache:
22
- return model_cache[model_name]
23
- tokenizer = MarianTokenizer.from_pretrained(model_name)
24
- model = MarianMTModel.from_pretrained(model_name)
25
- model_cache[model_name] = (model, tokenizer)
26
- return model, tokenizer
27
-
28
- # =============================
29
- # Функция перевода
30
- # =============================
31
- def translate_interface(direction, text):
32
- if not text or len(text.strip()) == 0:
33
- return "Введите текст для перевода!"
34
- model_name = model_options[direction]
35
- model, tokenizer = load_translation_model(model_name)
36
- batch = tokenizer([text], return_tensors="pt", padding=True)
37
- gen = model.generate(**batch)
38
- result = tokenizer.batch_decode(gen, skip_special_tokens=True)[0]
39
- return result
40
-
41
- # =============================
42
- # Функция BLEU
43
- # =============================
44
- def bleu_interface(direction, original, reference):
45
- if not original.strip() or not reference.strip():
46
- return "Введите текст и эталон!"
47
- translation = translate_interface(direction, original)
48
- smoothie = SmoothingFunction().method4
49
- ref_tokens = [reference.split()]
50
- hyp_tokens = translation.split()
51
- bleu = sentence_bleu(ref_tokens, hyp_tokens, smoothing_function=smoothie)
52
- return f"BLEU: {bleu:.4f}"
53
-
54
- # =============================
55
- # Gradio UI
56
- # =============================
57
- with gr.Blocks(theme=gr.themes.Soft(primary_hue="purple", neutral_hue="slate", secondary_hue="pink")) as demo:
58
- gr.Markdown("""
59
- # 🌙 Мощный Машинный Перевод
60
- Тёмная тема, стильный UI, BLEU, множество моделей.
61
- """)
62
-
63
- with gr.Row():
64
- direction = gr.Dropdown(list(model_options.keys()), label="Направление перевода")
65
-
66
- with gr.Row():
67
- input_text = gr.Textbox(lines=4, label="Введите текст")
68
- output_text = gr.Textbox(lines=4, label="Перевод")
69
-
70
- translate_btn = gr.Button("🚀 Перевести")
71
- translate_btn.click(fn=translate_interface, inputs=[direction, input_text], outputs=output_text)
72
-
73
- # Кнопка копирования перевода
74
- copy_btn = gr.Button("📋 Копировать перевод")
75
- copy_btn.click(lambda text: text, inputs=output_text, outputs=output_text)
76
-
77
- gr.Markdown("---")
78
- gr.Markdown("### 📊 BLEU Оценка качества")
79
-
80
- with gr.Row():
81
- original = gr.Textbox(lines=3, label="Исходный текст")
82
- reference = gr.Textbox(lines=3, label="Эталонный перевод")
83
- bleu_out = gr.Textbox(label="BLEU Score")
84
-
85
- bleu_btn = gr.Button("📈 Посчитать BLEU")
86
- bleu_btn.click(fn=bleu_interface, inputs=[direction, original, reference], outputs=bleu_out)
87
 
88
  if __name__ == "__main__":
89
  demo.launch()
 
1
  import gradio as gr
2
+ from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
3
+ import torch
4
+ import re
5
+
6
+
7
+ # --------------------------
8
+ # Очистка текста
9
+ # --------------------------
10
+ def clean_text(s):
11
+ s = s.strip()
12
+ s = re.sub(r"\s+", " ", s)
13
+ s = re.sub(r"\s+([,.!?;:])", r"\1", s)
14
+ return s
15
+
16
+
17
+ # --------------------------
18
+ # Загрузка модели
19
+ # --------------------------
20
+ def load_model(model_name):
21
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
22
+ model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
23
+ return tokenizer, model
24
+
25
+
26
+ # --------------------------
27
+ # Перевод
28
+ # --------------------------
29
+ def translate_text(text, model_name):
30
+ text = clean_text(text)
31
+
32
+ tokenizer, model = load_model(model_name)
33
+ device = "cuda" if torch.cuda.is_available() else "cpu"
34
+ model.to(device)
35
+
36
+ enc = tokenizer([text], return_tensors="pt", padding=True, truncation=True)
37
+ enc = {k: v.to(device) for k, v in enc.items()}
38
+
39
+ out = model.generate(**enc, max_length=150, num_beams=4)
40
+ translated = tokenizer.decode(out[0], skip_special_tokens=True)
41
+
42
+ return translated
43
+
44
+
45
+ # --------------------------
46
+ # Gradio Interface
47
+ # --------------------------
48
+ def build_ui():
49
+ title = "🌐 Automatic Text Translator"
50
+ description = """
51
+ Лёгкий переводчик на HuggingFace Transformers.
52
+ Выберите модель → введите текст → получите перевод.
53
+ """
54
+
55
+ with gr.Blocks(theme=gr.themes.Soft()) as demo:
56
+ gr.Markdown(f"# {title}")
57
+ gr.Markdown(description)
58
+
59
+ with gr.Row():
60
+ inp = gr.Textbox(label="Введите текст", placeholder="Hello, how are you?")
61
+ out = gr.Textbox(label="Перевод")
62
+
63
+ model_name = gr.Dropdown(
64
+ label="Модель перевода",
65
+ value="Helsinki-NLP/opus-mt-en-ru",
66
+ choices=[
67
+ "Helsinki-NLP/opus-mt-en-ru",
68
+ "Helsinki-NLP/opus-mt-ru-en",
69
+ "Helsinki-NLP/opus-mt-en-de",
70
+ "Helsinki-NLP/opus-mt-en-fr",
71
+ "Helsinki-NLP/opus-mt-en-kaz"
72
+ ],
73
+ )
74
+
75
+ btn = gr.Button("Перевести")
76
+
77
+ btn.click(translate_text, inputs=[inp, model_name], outputs=out)
78
+
79
+ return demo
80
+
81
+
82
+ demo = build_ui()
 
 
 
 
83
 
84
  if __name__ == "__main__":
85
  demo.launch()