entropy25 commited on
Commit
97c7079
·
verified ·
1 Parent(s): 4c0b678

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +43 -115
app.py CHANGED
@@ -1,131 +1,59 @@
1
  import gradio as gr
2
  import torch
3
- from transformers import MBartForConditionalGeneration, MBart50TokenizerFast
 
4
 
5
- # Load Facebook's mBART-50 model
6
- model_name = "facebook/mbart-large-50-many-to-many-mmt"
7
- tokenizer = MBart50TokenizerFast.from_pretrained(model_name)
8
- model = MBartForConditionalGeneration.from_pretrained(model_name)
 
 
 
 
 
 
 
 
9
 
10
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
11
  model.to(device)
12
 
13
  def translate(text, source_lang, target_lang):
14
  if not text.strip():
15
- return "Please enter text to translate"
16
 
17
- # Language codes for mBART-50
18
  lang_map = {
19
- "Norwegian": "nb_NO",
20
- "English": "en_XX"
21
  }
22
 
23
- try:
24
- # Set source language
25
- tokenizer.src_lang = lang_map[source_lang]
26
-
27
- # Tokenize
28
- encoded = tokenizer(text, return_tensors="pt", truncation=True, max_length=512)
29
- encoded = encoded.to(device)
30
-
31
- # Generate translation
32
- generated = model.generate(
33
- **encoded,
34
- forced_bos_token_id=tokenizer.lang_code_to_id[lang_map[target_lang]],
35
- max_length=512,
36
- num_beams=5,
37
- early_stopping=True
38
- )
39
-
40
- # Decode result
41
- result = tokenizer.batch_decode(generated, skip_special_tokens=True)[0]
42
- return result
43
-
44
- except Exception as e:
45
- return f"Translation error: {str(e)}"
46
-
47
- def swap_languages(source, target, text):
48
- return target, source, ""
49
-
50
- # Create Gradio interface
51
- with gr.Blocks(
52
- title="🇳🇴 ↔️ 🇬🇧 Norwegian-English Translator",
53
- theme=gr.themes.Soft()
54
- ) as demo:
55
-
56
- gr.Markdown("""
57
- # 🌍 Advanced Norwegian-English Translator
58
- ### Powered by Facebook's mBART-50 Model
59
- """)
60
-
61
- with gr.Row():
62
- source_lang = gr.Dropdown(
63
- choices=["Norwegian", "English"],
64
- value="Norwegian",
65
- label="Source Language"
66
- )
67
-
68
- swap_btn = gr.Button("🔄", size="sm")
69
-
70
- target_lang = gr.Dropdown(
71
- choices=["Norwegian", "English"],
72
- value="English",
73
- label="Target Language"
74
- )
75
-
76
- with gr.Row():
77
- with gr.Column():
78
- input_text = gr.Textbox(
79
- lines=8,
80
- placeholder="Enter text to translate...",
81
- label="Input Text",
82
- max_lines=15
83
- )
84
-
85
- with gr.Column():
86
- output_text = gr.Textbox(
87
- lines=8,
88
- label="Translation",
89
- interactive=False,
90
- max_lines=15
91
- )
92
-
93
- translate_btn = gr.Button("🚀 Translate", variant="primary", size="lg")
94
-
95
- # Event handlers
96
- translate_btn.click(
97
- fn=translate,
98
- inputs=[input_text, source_lang, target_lang],
99
- outputs=output_text
100
- )
101
-
102
- swap_btn.click(
103
- fn=swap_languages,
104
- inputs=[source_lang, target_lang, input_text],
105
- outputs=[source_lang, target_lang, input_text]
106
  )
107
 
108
- input_text.submit(
109
- fn=translate,
110
- inputs=[input_text, source_lang, target_lang],
111
- outputs=output_text
112
- )
113
-
114
- gr.Markdown("""
115
- ### 📝 Examples
116
- **Norwegian to English:**
117
- - "Hei, hvordan har du det?" → "Hello, how are you?"
118
- - "Jeg kommer fra Norge" → "I come from Norway"
119
-
120
- **English to Norwegian:**
121
- - "Thank you very much" → "Tusen takk"
122
- - "Good morning" → "God morgen"
123
- """)
124
-
125
- gr.Markdown("""
126
- ---
127
- *Built with ❤️ using Facebook's mBART-50 and Gradio*
128
- """)
129
 
130
- if __name__ == "__main__":
131
- demo.launch(share=True)
 
 
 
 
 
 
 
 
 
 
 
1
  import gradio as gr
2
  import torch
3
+ from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
4
+ from peft import PeftModel
5
 
6
+ # Base model(基础模型)
7
+ base_model_name = "facebook/nllb-200-distilled-600M"
8
+
9
+ # LoRA adapter(你的模型)
10
+ adapter_model_name = "entropy25/mt_en_no_oil"
11
+
12
+ # Load tokenizer and base model
13
+ tokenizer = AutoTokenizer.from_pretrained(base_model_name)
14
+ base_model = AutoModelForSeq2SeqLM.from_pretrained(base_model_name)
15
+
16
+ # Apply the LoRA adapter
17
+ model = PeftModel.from_pretrained(base_model, adapter_model_name)
18
 
19
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
20
  model.to(device)
21
 
22
  def translate(text, source_lang, target_lang):
23
  if not text.strip():
24
+ return "Please enter text to translate."
25
 
 
26
  lang_map = {
27
+ "English": "eng_Latn",
28
+ "Norwegian": "nob_Latn"
29
  }
30
 
31
+ inputs = tokenizer(
32
+ text,
33
+ return_tensors="pt",
34
+ truncation=True,
35
+ max_length=512
36
+ ).to(device)
37
+
38
+ outputs = model.generate(
39
+ **inputs,
40
+ forced_bos_token_id=tokenizer.convert_tokens_to_ids(lang_map[target_lang]),
41
+ max_length=512,
42
+ num_beams=5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
43
  )
44
 
45
+ result = tokenizer.decode(outputs[0], skip_special_tokens=True)
46
+ return result
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
47
 
48
+ # Simple Gradio UI
49
+ gr.Interface(
50
+ fn=lambda text, src, tgt: translate(text, src, tgt),
51
+ inputs=[
52
+ gr.Textbox(label="Input text", lines=6),
53
+ gr.Dropdown(choices=["English", "Norwegian"], label="Source language", value="English"),
54
+ gr.Dropdown(choices=["English", "Norwegian"], label="Target language", value="Norwegian")
55
+ ],
56
+ outputs=gr.Textbox(label="Translation", lines=6),
57
+ title="LoRA-Enhanced English↔Norwegian Translator",
58
+ description="Fine-tuned NLLB-200 model with LoRA adapter: entropy25/mt_en_no_oil"
59
+ ).launch(share=True)