khurrambasharat commited on
Commit
a23ba4b
Β·
verified Β·
1 Parent(s): 8899d47

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +42 -23
app.py CHANGED
@@ -1,13 +1,15 @@
1
  import os
2
  os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "0"
3
 
4
- from transformers import MBart50TokenizerFast, MBartForConditionalGeneration, AutoConfig
 
 
 
5
  import gradio as gr
6
 
7
- # ---- Load model & tokenizer ----
8
  model_name = "Mudasir692/mbart-eng-ur"
9
 
10
- # Fix config issue
11
  config = AutoConfig.from_pretrained(model_name)
12
  if getattr(config, "early_stopping", None) is None:
13
  config.early_stopping = True
@@ -15,6 +17,11 @@ if getattr(config, "early_stopping", None) is None:
15
  tokenizer = MBart50TokenizerFast.from_pretrained(model_name)
16
  model = MBartForConditionalGeneration.from_pretrained(model_name, config=config)
17
 
 
 
 
 
 
18
  # ---- Language mapping ----
19
  LANG_CODES = {
20
  "Urdu": "ur_PK",
@@ -39,45 +46,57 @@ LANG_CODES = {
39
  "Tamil": "ta_IN"
40
  }
41
 
 
 
 
 
 
 
 
 
 
42
  # ---- Translation function ----
43
  def translate_text(text, target_lang, auto_detect):
44
  if not text.strip():
45
- return "⚠️ Please enter text to translate."
46
 
47
- # Source language
 
 
 
48
  if auto_detect:
49
- # Very simple heuristic-based detection
50
- if any("\u0600" <= ch <= "\u06FF" for ch in text):
51
  src_lang = "ur_PK"
52
- elif any("\u0900" <= ch <= "\u097F" for ch in text):
53
  src_lang = "hi_IN"
54
  else:
55
  src_lang = "en_XX"
56
  else:
57
  src_lang = "en_XX"
58
 
 
59
  tgt_lang_code = LANG_CODES.get(target_lang, "ur_PK")
60
-
61
  tokenizer.src_lang = src_lang
62
  tokenizer.tgt_lang = tgt_lang_code
63
 
64
- inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True)
65
  translated_tokens = model.generate(
66
  **inputs,
67
  max_length=256,
68
  num_beams=5,
69
  early_stopping=True
70
  )
71
- output = tokenizer.decode(translated_tokens[0], skip_special_tokens=True)
72
- return output
 
73
 
74
  # ---- Examples ----
75
  examples = [
76
- ["How are you?", "Urdu", False],
77
- ["Where are you going?", "Arabic", False],
78
- ["This is my new project.", "Hindi", False],
79
  ["I love learning new languages.", "French", False],
80
- ["Can you help me?", "Spanish", False],
81
  ]
82
 
83
  # ---- Gradio Interface ----
@@ -88,28 +107,28 @@ with gr.Blocks(css="""
88
 
89
  gr.Markdown("""
90
  <div style='text-align:center;'>
91
- <h2>🌍 Multi-Language Translator (mBART)</h2>
92
- <p>Translate between English and 20+ languages using a fine-tuned mBART model.</p>
 
93
  <p style='color:gray;'>Built by <b>Khurram Basharat</b> β€” powered by Hugging Face & Gradio.</p>
94
  </div>
95
  """)
96
 
97
  with gr.Row():
98
  with gr.Column(scale=1):
99
- text_input = gr.Textbox(label="Enter Text", placeholder="Type your sentence here...", lines=4)
100
  target_lang = gr.Dropdown(sorted(LANG_CODES.keys()), label="Select Target Language", value="Urdu")
101
  auto_detect = gr.Checkbox(label="Auto-detect Source Language", value=False)
102
  translate_btn = gr.Button("🌐 Translate")
103
 
104
  with gr.Column(scale=1):
105
- result_output = gr.Textbox(label="Translation", lines=4)
106
- copy_btn = gr.Button("πŸ“‹ Copy Translation")
107
 
108
  gr.Examples(examples, inputs=[text_input, target_lang, auto_detect])
109
 
110
  # ---- Actions ----
111
- translate_btn.click(translate_text, inputs=[text_input, target_lang, auto_detect], outputs=result_output)
112
- #copy_btn.click(None, inputs=result_output, outputs=None, _js="(text) => navigator.clipboard.writeText(text)")
113
 
114
  # ---- Launch app ----
115
  app.launch(server_name="0.0.0.0", server_port=7860)
 
1
  import os
2
  os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "0"
3
 
4
+ from transformers import (
5
+ MBart50TokenizerFast, MBartForConditionalGeneration, AutoConfig,
6
+ AutoTokenizer, AutoModelForSeq2SeqLM
7
+ )
8
  import gradio as gr
9
 
10
+ # ---- Load Translation Model ----
11
  model_name = "Mudasir692/mbart-eng-ur"
12
 
 
13
  config = AutoConfig.from_pretrained(model_name)
14
  if getattr(config, "early_stopping", None) is None:
15
  config.early_stopping = True
 
17
  tokenizer = MBart50TokenizerFast.from_pretrained(model_name)
18
  model = MBartForConditionalGeneration.from_pretrained(model_name, config=config)
19
 
20
+ # ---- Load Grammar Correction Model ----
21
+ grammar_model_name = "vennify/t5-base-grammar-correction"
22
+ grammar_tokenizer = AutoTokenizer.from_pretrained(grammar_model_name)
23
+ grammar_model = AutoModelForSeq2SeqLM.from_pretrained(grammar_model_name)
24
+
25
  # ---- Language mapping ----
26
  LANG_CODES = {
27
  "Urdu": "ur_PK",
 
46
  "Tamil": "ta_IN"
47
  }
48
 
49
+ # ---- Grammar Correction Function ----
50
+ def correct_grammar(text):
51
+ if not text.strip():
52
+ return text
53
+ inputs = grammar_tokenizer.encode(f"fix: {text}", return_tensors="pt", max_length=512, truncation=True)
54
+ outputs = grammar_model.generate(inputs, max_length=512, num_beams=4, early_stopping=True)
55
+ corrected_text = grammar_tokenizer.decode(outputs[0], skip_special_tokens=True)
56
+ return corrected_text
57
+
58
  # ---- Translation function ----
59
  def translate_text(text, target_lang, auto_detect):
60
  if not text.strip():
61
+ return "⚠️ Please enter text to translate.", ""
62
 
63
+ # Step 1: Grammar correction
64
+ corrected_text = correct_grammar(text)
65
+
66
+ # Step 2: Language detection
67
  if auto_detect:
68
+ if any("\u0600" <= ch <= "\u06FF" for ch in corrected_text):
 
69
  src_lang = "ur_PK"
70
+ elif any("\u0900" <= ch <= "\u097F" for ch in corrected_text):
71
  src_lang = "hi_IN"
72
  else:
73
  src_lang = "en_XX"
74
  else:
75
  src_lang = "en_XX"
76
 
77
+ # Step 3: Translation
78
  tgt_lang_code = LANG_CODES.get(target_lang, "ur_PK")
 
79
  tokenizer.src_lang = src_lang
80
  tokenizer.tgt_lang = tgt_lang_code
81
 
82
+ inputs = tokenizer(corrected_text, return_tensors="pt", padding=True, truncation=True)
83
  translated_tokens = model.generate(
84
  **inputs,
85
  max_length=256,
86
  num_beams=5,
87
  early_stopping=True
88
  )
89
+ translated_output = tokenizer.decode(translated_tokens[0], skip_special_tokens=True)
90
+
91
+ return corrected_text, translated_output
92
 
93
  # ---- Examples ----
94
  examples = [
95
+ ["I goes to school every day.", "Urdu", False],
96
+ ["He dont like this movie.", "Hindi", False],
97
+ ["This is my new project.", "Arabic", False],
98
  ["I love learning new languages.", "French", False],
99
+ ["Can you helps me?", "Spanish", False],
100
  ]
101
 
102
  # ---- Gradio Interface ----
 
107
 
108
  gr.Markdown("""
109
  <div style='text-align:center;'>
110
+ <h2>🌍 Smart Multi-Language Translator</h2>
111
+ <h4>✨ Includes Grammar Correction Before Translation ✨</h4>
112
+ <p>Translate between English and 20+ languages using a fine-tuned mBART model with auto grammar correction.</p>
113
  <p style='color:gray;'>Built by <b>Khurram Basharat</b> β€” powered by Hugging Face & Gradio.</p>
114
  </div>
115
  """)
116
 
117
  with gr.Row():
118
  with gr.Column(scale=1):
119
+ text_input = gr.Textbox(label="Enter Text", placeholder="Type your English sentence...", lines=4)
120
  target_lang = gr.Dropdown(sorted(LANG_CODES.keys()), label="Select Target Language", value="Urdu")
121
  auto_detect = gr.Checkbox(label="Auto-detect Source Language", value=False)
122
  translate_btn = gr.Button("🌐 Translate")
123
 
124
  with gr.Column(scale=1):
125
+ corrected_output = gr.Textbox(label="Corrected English Sentence", lines=3)
126
+ translated_output = gr.Textbox(label="Translated Sentence", lines=3)
127
 
128
  gr.Examples(examples, inputs=[text_input, target_lang, auto_detect])
129
 
130
  # ---- Actions ----
131
+ translate_btn.click(translate_text, inputs=[text_input, target_lang, auto_detect], outputs=[corrected_output, translated_output])
 
132
 
133
  # ---- Launch app ----
134
  app.launch(server_name="0.0.0.0", server_port=7860)