entropy25 commited on
Commit
847f4f3
·
verified ·
1 Parent(s): e50136c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +56 -78
app.py CHANGED
@@ -1,63 +1,60 @@
 
 
1
  from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
2
  from peft import PeftModel
3
 
4
- BASE_MODEL = "facebook/nllb-200-distilled-600M"
5
- ADAPTER_NO_TO_EN = "entropy25/mt_en_no_oil"
6
- #ADAPTER_EN_TO_NO = "entropy25/no_en"
7
-
8
-
9
- tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL)
10
 
 
11
  base_model = AutoModelForSeq2SeqLM.from_pretrained(
12
- BASE_MODEL,
13
  torch_dtype=torch.float16,
14
  low_cpu_mem_usage=True,
15
  device_map="auto"
16
  )
17
 
18
- model_no_to_en = PeftModel.from_pretrained(base_model, ADAPTER_NO_TO_EN)
19
- model_en_to_no = PeftModel.from_pretrained(base_model, ADAPTER_EN_TO_NO)
20
-
21
- LANG_CODES = {
22
- "English": "eng_Latn",
23
- "Norwegian": "nob_Latn"
24
- }
25
 
26
  def translate(text, source_lang, target_lang):
27
  if not text.strip():
28
- return "Please enter text to translate"
29
 
30
  if source_lang == target_lang:
31
- return "Source and target languages must be different"
32
 
33
- try:
34
- model = model_no_to_en if source_lang == "Norwegian" else model_en_to_no
35
-
36
- inputs = tokenizer(
37
- text,
38
- return_tensors="pt",
39
- truncation=True,
40
- max_length=512
41
- )
42
-
43
- if hasattr(model, 'device'):
44
- inputs = {k: v.to(model.device) for k, v in inputs.items()}
45
-
46
- outputs = model.generate(
47
- **inputs,
48
- forced_bos_token_id=tokenizer.convert_tokens_to_ids(LANG_CODES[target_lang]),
49
- max_length=512,
50
- num_beams=5
51
- )
52
-
53
- result = tokenizer.decode(outputs[0], skip_special_tokens=True)
54
- return result
55
 
56
- except Exception as e:
57
- return f"Translation error: {str(e)}"
 
 
 
 
 
58
 
59
- def swap_languages(source, target, text, translation):
60
- return target, source, translation, text
61
 
62
  with gr.Blocks(theme=gr.themes.Soft()) as demo:
63
  gr.Markdown("# Oil & Gas Professional Translation")
@@ -69,9 +66,7 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
69
  label="Source Language",
70
  value="English"
71
  )
72
-
73
- swap_btn = gr.Button("⇄", scale=0, size="sm")
74
-
75
  target_lang = gr.Dropdown(
76
  choices=["English", "Norwegian"],
77
  label="Target Language",
@@ -85,61 +80,44 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
85
  placeholder="Enter text to translate",
86
  lines=8
87
  )
88
- input_chars = gr.Textbox(
89
- label="Character Count",
90
- value="0",
91
- interactive=False,
92
- max_lines=1
93
- )
94
 
95
  with gr.Column():
96
  output_text = gr.Textbox(
97
  label="Translation",
98
- lines=8,
99
- interactive=False
100
  )
101
- with gr.Row():
102
- copy_btn = gr.Button("📋 Copy", scale=1)
103
- clear_btn = gr.Button("🗑️ Clear", scale=1)
104
 
105
- translate_btn = gr.Button("Translate", variant="primary", size="lg")
 
 
 
 
106
 
107
  gr.Examples(
108
  examples=[
109
- ["The drilling operation encountered high pressure", "English", "Norwegian"],
110
- ["Reservoaret viser god permeabilitet", "Norwegian", "English"]
 
111
  ],
112
- inputs=[input_text, source_lang, target_lang]
113
- )
114
-
115
- input_text.change(
116
- fn=lambda x: str(len(x)),
117
- inputs=input_text,
118
- outputs=input_chars
119
  )
120
 
121
  translate_btn.click(
122
  fn=translate,
123
  inputs=[input_text, source_lang, target_lang],
124
- outputs=output_text
125
  )
126
 
127
  swap_btn.click(
128
  fn=swap_languages,
129
- inputs=[source_lang, target_lang, input_text, output_text],
130
- outputs=[source_lang, target_lang, input_text, output_text]
131
-
132
- )
133
-
134
- copy_btn.click(
135
- fn=lambda x: x,
136
- inputs=output_text,
137
- outputs=input_text
138
  )
139
 
140
  clear_btn.click(
141
- fn=lambda: ("", ""),
142
- outputs=[input_text, output_text]
143
  )
144
 
145
  demo.launch()
 
1
+ import gradio as gr
2
+ import torch
3
  from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
4
  from peft import PeftModel
5
 
6
+ base_model_name = "facebook/nllb-200-distilled-600M"
7
+ adapter_model_name = "entropy25/mt_en_no_oil"
 
 
 
 
8
 
9
+ tokenizer = AutoTokenizer.from_pretrained(base_model_name)
10
  base_model = AutoModelForSeq2SeqLM.from_pretrained(
11
+ base_model_name,
12
  torch_dtype=torch.float16,
13
  low_cpu_mem_usage=True,
14
  device_map="auto"
15
  )
16
 
17
+ model = PeftModel.from_pretrained(base_model, adapter_model_name)
 
 
 
 
 
 
18
 
19
  def translate(text, source_lang, target_lang):
20
  if not text.strip():
21
+ return "Please enter text to translate", "0 characters"
22
 
23
  if source_lang == target_lang:
24
+ return "Source and target languages cannot be the same", f"{len(text)} characters"
25
 
26
+ lang_map = {
27
+ "English": "eng_Latn",
28
+ "Norwegian": "nob_Latn"
29
+ }
30
+
31
+ inputs = tokenizer(
32
+ text,
33
+ return_tensors="pt",
34
+ truncation=True,
35
+ max_length=512
36
+ )
37
+
38
+ if hasattr(model, 'device'):
39
+ inputs = {k: v.to(model.device) for k, v in inputs.items()}
40
+
41
+ outputs = model.generate(
42
+ **inputs,
43
+ forced_bos_token_id=tokenizer.convert_tokens_to_ids(lang_map[target_lang]),
44
+ max_length=512,
45
+ num_beams=5
46
+ )
 
47
 
48
+ result = tokenizer.decode(outputs[0], skip_special_tokens=True)
49
+ char_info = f"{len(text)} {len(result)} characters"
50
+
51
+ return result, char_info
52
+
53
+ def swap_languages(src, tgt):
54
+ return tgt, src
55
 
56
+ def clear_all():
57
+ return "", "", "0 characters"
58
 
59
  with gr.Blocks(theme=gr.themes.Soft()) as demo:
60
  gr.Markdown("# Oil & Gas Professional Translation")
 
66
  label="Source Language",
67
  value="English"
68
  )
69
+ swap_btn = gr.Button("⇄ Swap", size="sm")
 
 
70
  target_lang = gr.Dropdown(
71
  choices=["English", "Norwegian"],
72
  label="Target Language",
 
80
  placeholder="Enter text to translate",
81
  lines=8
82
  )
 
 
 
 
 
 
83
 
84
  with gr.Column():
85
  output_text = gr.Textbox(
86
  label="Translation",
87
+ lines=8
 
88
  )
 
 
 
89
 
90
+ char_count = gr.Textbox(label="Statistics", value="0 characters", interactive=False)
91
+
92
+ with gr.Row():
93
+ translate_btn = gr.Button("Translate", variant="primary", size="lg")
94
+ clear_btn = gr.Button("Clear All", size="lg")
95
 
96
  gr.Examples(
97
  examples=[
98
+ ["The drilling operation encountered high pressure at 3000 meters depth", "English", "Norwegian"],
99
+ ["Reservoaret viser god permeabilitet og porøsitet", "Norwegian", "English"],
100
+ ["Wellhead pressure monitoring indicates stable production", "English", "Norwegian"]
101
  ],
102
+ inputs=[input_text, source_lang, target_lang],
103
+ label="Example Translations"
 
 
 
 
 
104
  )
105
 
106
  translate_btn.click(
107
  fn=translate,
108
  inputs=[input_text, source_lang, target_lang],
109
+ outputs=[output_text, char_count]
110
  )
111
 
112
  swap_btn.click(
113
  fn=swap_languages,
114
+ inputs=[source_lang, target_lang],
115
+ outputs=[source_lang, target_lang]
 
 
 
 
 
 
 
116
  )
117
 
118
  clear_btn.click(
119
+ fn=clear_all,
120
+ outputs=[input_text, output_text, char_count]
121
  )
122
 
123
  demo.launch()