khurrambasharat commited on
Commit
31e4a6e
Β·
verified Β·
1 Parent(s): a23ba4b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +33 -99
app.py CHANGED
@@ -1,26 +1,20 @@
1
  import os
2
  os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "0"
3
 
4
- from transformers import (
5
- MBart50TokenizerFast, MBartForConditionalGeneration, AutoConfig,
6
- AutoTokenizer, AutoModelForSeq2SeqLM
7
- )
8
  import gradio as gr
9
 
10
- # ---- Load Translation Model ----
11
  model_name = "Mudasir692/mbart-eng-ur"
12
-
13
  config = AutoConfig.from_pretrained(model_name)
14
- if getattr(config, "early_stopping", None) is None:
15
  config.early_stopping = True
16
 
17
  tokenizer = MBart50TokenizerFast.from_pretrained(model_name)
18
  model = MBartForConditionalGeneration.from_pretrained(model_name, config=config)
19
 
20
- # ---- Load Grammar Correction Model ----
21
- grammar_model_name = "vennify/t5-base-grammar-correction"
22
- grammar_tokenizer = AutoTokenizer.from_pretrained(grammar_model_name)
23
- grammar_model = AutoModelForSeq2SeqLM.from_pretrained(grammar_model_name)
24
 
25
  # ---- Language mapping ----
26
  LANG_CODES = {
@@ -28,107 +22,47 @@ LANG_CODES = {
28
  "Arabic": "ar_AR",
29
  "Hindi": "hi_IN",
30
  "French": "fr_XX",
31
- "German": "de_DE",
32
  "Spanish": "es_XX",
33
- "Chinese": "zh_CN",
34
- "Italian": "it_IT",
35
- "Portuguese": "pt_XX",
36
- "Russian": "ru_RU",
37
- "Japanese": "ja_XX",
38
- "Korean": "ko_KR",
39
- "Turkish": "tr_TR",
40
- "Persian": "fa_IR",
41
- "Bengali": "bn_IN",
42
- "Punjabi": "pa_IN",
43
- "Pashto": "ps_AF",
44
- "Malay": "ms_MY",
45
- "Indonesian": "id_ID",
46
- "Tamil": "ta_IN"
47
  }
48
 
49
- # ---- Grammar Correction Function ----
50
- def correct_grammar(text):
51
- if not text.strip():
52
- return text
53
- inputs = grammar_tokenizer.encode(f"fix: {text}", return_tensors="pt", max_length=512, truncation=True)
54
- outputs = grammar_model.generate(inputs, max_length=512, num_beams=4, early_stopping=True)
55
- corrected_text = grammar_tokenizer.decode(outputs[0], skip_special_tokens=True)
56
- return corrected_text
57
-
58
  # ---- Translation function ----
59
- def translate_text(text, target_lang, auto_detect):
60
  if not text.strip():
61
- return "⚠️ Please enter text to translate.", ""
62
-
63
- # Step 1: Grammar correction
64
- corrected_text = correct_grammar(text)
65
 
66
- # Step 2: Language detection
67
- if auto_detect:
68
- if any("\u0600" <= ch <= "\u06FF" for ch in corrected_text):
69
- src_lang = "ur_PK"
70
- elif any("\u0900" <= ch <= "\u097F" for ch in corrected_text):
71
- src_lang = "hi_IN"
72
- else:
73
- src_lang = "en_XX"
74
- else:
75
- src_lang = "en_XX"
76
 
77
- # Step 3: Translation
78
  tgt_lang_code = LANG_CODES.get(target_lang, "ur_PK")
79
- tokenizer.src_lang = src_lang
80
  tokenizer.tgt_lang = tgt_lang_code
81
 
82
- inputs = tokenizer(corrected_text, return_tensors="pt", padding=True, truncation=True)
83
- translated_tokens = model.generate(
84
- **inputs,
85
- max_length=256,
86
- num_beams=5,
87
- early_stopping=True
88
- )
89
- translated_output = tokenizer.decode(translated_tokens[0], skip_special_tokens=True)
90
-
91
- return corrected_text, translated_output
92
 
93
- # ---- Examples ----
94
- examples = [
95
- ["I goes to school every day.", "Urdu", False],
96
- ["He dont like this movie.", "Hindi", False],
97
- ["This is my new project.", "Arabic", False],
98
- ["I love learning new languages.", "French", False],
99
- ["Can you helps me?", "Spanish", False],
100
- ]
101
 
102
  # ---- Gradio Interface ----
103
- with gr.Blocks(css="""
104
- body {background: linear-gradient(to bottom right, #f7f9fb, #e0f7fa);}
105
- .gr-button-primary {background-color: #1e3799 !important; color: white !important;}
106
- """) as app:
107
-
108
- gr.Markdown("""
 
 
 
 
109
  <div style='text-align:center;'>
110
- <h2>🌍 Smart Multi-Language Translator</h2>
111
- <h4>✨ Includes Grammar Correction Before Translation ✨</h4>
112
- <p>Translate between English and 20+ languages using a fine-tuned mBART model with auto grammar correction.</p>
113
- <p style='color:gray;'>Built by <b>Khurram Basharat</b> β€” powered by Hugging Face & Gradio.</p>
114
  </div>
115
- """)
116
-
117
- with gr.Row():
118
- with gr.Column(scale=1):
119
- text_input = gr.Textbox(label="Enter Text", placeholder="Type your English sentence...", lines=4)
120
- target_lang = gr.Dropdown(sorted(LANG_CODES.keys()), label="Select Target Language", value="Urdu")
121
- auto_detect = gr.Checkbox(label="Auto-detect Source Language", value=False)
122
- translate_btn = gr.Button("🌐 Translate")
123
-
124
- with gr.Column(scale=1):
125
- corrected_output = gr.Textbox(label="Corrected English Sentence", lines=3)
126
- translated_output = gr.Textbox(label="Translated Sentence", lines=3)
127
-
128
- gr.Examples(examples, inputs=[text_input, target_lang, auto_detect])
129
-
130
- # ---- Actions ----
131
- translate_btn.click(translate_text, inputs=[text_input, target_lang, auto_detect], outputs=[corrected_output, translated_output])
132
 
133
- # ---- Launch app ----
134
- app.launch(server_name="0.0.0.0", server_port=7860)
 
1
  import os
2
  os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "0"
3
 
4
+ from transformers import MBart50TokenizerFast, MBartForConditionalGeneration, AutoConfig, pipeline
 
 
 
5
  import gradio as gr
6
 
7
+ # ---- Load models ----
8
  model_name = "Mudasir692/mbart-eng-ur"
 
9
  config = AutoConfig.from_pretrained(model_name)
10
+ if config.early_stopping is None:
11
  config.early_stopping = True
12
 
13
  tokenizer = MBart50TokenizerFast.from_pretrained(model_name)
14
  model = MBartForConditionalGeneration.from_pretrained(model_name, config=config)
15
 
16
+ # Grammar correction pipeline
17
+ grammar_corrector = pipeline("text2text-generation", model="vennify/t5-base-grammar-correction")
 
 
18
 
19
  # ---- Language mapping ----
20
  LANG_CODES = {
 
22
  "Arabic": "ar_AR",
23
  "Hindi": "hi_IN",
24
  "French": "fr_XX",
 
25
  "Spanish": "es_XX",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
  }
27
 
 
 
 
 
 
 
 
 
 
28
  # ---- Translation function ----
29
+ def translate_text(text, target_lang, correct_grammar):
30
  if not text.strip():
31
+ return "Please enter some English text."
 
 
 
32
 
33
+ # Step 1: Grammar correction (if enabled)
34
+ corrected_text = text
35
+ if correct_grammar:
36
+ result = grammar_corrector(text, max_length=128, num_beams=4)
37
+ corrected_text = result[0]['generated_text']
 
 
 
 
 
38
 
39
+ # Step 2: Translation
40
  tgt_lang_code = LANG_CODES.get(target_lang, "ur_PK")
41
+ tokenizer.src_lang = "en_XX"
42
  tokenizer.tgt_lang = tgt_lang_code
43
 
44
+ inputs = tokenizer(corrected_text, return_tensors="pt", padding=True)
45
+ translated_tokens = model.generate(**inputs)
46
+ translated_text = tokenizer.decode(translated_tokens[0], skip_special_tokens=True)
 
 
 
 
 
 
 
47
 
48
+ return f"βœ… Corrected English: {corrected_text}\n\n🌐 Translation ({target_lang}): {translated_text}"
 
 
 
 
 
 
 
49
 
50
  # ---- Gradio Interface ----
51
+ app = gr.Interface(
52
+ fn=translate_text,
53
+ inputs=[
54
+ gr.Textbox(label="Enter English Text", placeholder="Type your English sentence here...", lines=2),
55
+ gr.Dropdown(["Urdu", "Arabic", "Hindi", "French", "Spanish"], label="Select Target Language", value="Urdu"),
56
+ gr.Checkbox(label="Correct Grammar Before Translation", value=True)
57
+ ],
58
+ outputs=gr.Textbox(label="Output (Corrected + Translated)", lines=4),
59
+ title="🌍 Smart Multi-Language Translator + Grammar Corrector",
60
+ description="""
61
  <div style='text-align:center;'>
62
+ <h3>Auto-correct English grammar before translating into multiple languages.</h3>
63
+ <p style='color:gray;'>Powered by Transformers & Gradio β€” built by <b>Khurram Basharat</b>.</p>
 
 
64
  </div>
65
+ """,
66
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
67
 
68
+ app.launch()