khurrambasharat commited on
Commit
ba04c9b
Β·
verified Β·
1 Parent(s): 31e4a6e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +81 -34
app.py CHANGED
@@ -1,68 +1,115 @@
1
  import os
2
  os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "0"
3
 
4
- from transformers import MBart50TokenizerFast, MBartForConditionalGeneration, AutoConfig, pipeline
5
  import gradio as gr
6
 
7
- # ---- Load models ----
8
  model_name = "Mudasir692/mbart-eng-ur"
 
 
9
  config = AutoConfig.from_pretrained(model_name)
10
- if config.early_stopping is None:
11
  config.early_stopping = True
12
 
13
  tokenizer = MBart50TokenizerFast.from_pretrained(model_name)
14
  model = MBartForConditionalGeneration.from_pretrained(model_name, config=config)
15
 
16
- # Grammar correction pipeline
17
- grammar_corrector = pipeline("text2text-generation", model="vennify/t5-base-grammar-correction")
18
-
19
  # ---- Language mapping ----
20
  LANG_CODES = {
21
  "Urdu": "ur_PK",
22
  "Arabic": "ar_AR",
23
  "Hindi": "hi_IN",
24
  "French": "fr_XX",
 
25
  "Spanish": "es_XX",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
  }
27
 
28
  # ---- Translation function ----
29
- def translate_text(text, target_lang, correct_grammar):
30
  if not text.strip():
31
- return "Please enter some English text."
32
 
33
- # Step 1: Grammar correction (if enabled)
34
- corrected_text = text
35
- if correct_grammar:
36
- result = grammar_corrector(text, max_length=128, num_beams=4)
37
- corrected_text = result[0]['generated_text']
 
 
 
 
 
 
38
 
39
- # Step 2: Translation
40
  tgt_lang_code = LANG_CODES.get(target_lang, "ur_PK")
41
- tokenizer.src_lang = "en_XX"
 
42
  tokenizer.tgt_lang = tgt_lang_code
43
 
44
- inputs = tokenizer(corrected_text, return_tensors="pt", padding=True)
45
- translated_tokens = model.generate(**inputs)
46
- translated_text = tokenizer.decode(translated_tokens[0], skip_special_tokens=True)
 
 
 
 
 
 
47
 
48
- return f"βœ… Corrected English: {corrected_text}\n\n🌐 Translation ({target_lang}): {translated_text}"
 
 
 
 
 
 
 
49
 
50
  # ---- Gradio Interface ----
51
- app = gr.Interface(
52
- fn=translate_text,
53
- inputs=[
54
- gr.Textbox(label="Enter English Text", placeholder="Type your English sentence here...", lines=2),
55
- gr.Dropdown(["Urdu", "Arabic", "Hindi", "French", "Spanish"], label="Select Target Language", value="Urdu"),
56
- gr.Checkbox(label="Correct Grammar Before Translation", value=True)
57
- ],
58
- outputs=gr.Textbox(label="Output (Corrected + Translated)", lines=4),
59
- title="🌍 Smart Multi-Language Translator + Grammar Corrector",
60
- description="""
61
  <div style='text-align:center;'>
62
- <h3>Auto-correct English grammar before translating into multiple languages.</h3>
63
- <p style='color:gray;'>Powered by Transformers & Gradio β€” built by <b>Khurram Basharat</b>.</p>
 
64
  </div>
65
- """,
66
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
67
 
68
- app.launch()
 
 
1
  import os
2
  os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "0"
3
 
4
+ from transformers import MBart50TokenizerFast, MBartForConditionalGeneration, AutoConfig
5
  import gradio as gr
6
 
7
+ # ---- Load model & tokenizer ----
8
  model_name = "Mudasir692/mbart-eng-ur"
9
+
10
+ # Fix config issue
11
  config = AutoConfig.from_pretrained(model_name)
12
+ if getattr(config, "early_stopping", None) is None:
13
  config.early_stopping = True
14
 
15
  tokenizer = MBart50TokenizerFast.from_pretrained(model_name)
16
  model = MBartForConditionalGeneration.from_pretrained(model_name, config=config)
17
 
 
 
 
18
  # ---- Language mapping ----
19
  LANG_CODES = {
20
  "Urdu": "ur_PK",
21
  "Arabic": "ar_AR",
22
  "Hindi": "hi_IN",
23
  "French": "fr_XX",
24
+ "German": "de_DE",
25
  "Spanish": "es_XX",
26
+ "Chinese": "zh_CN",
27
+ "Italian": "it_IT",
28
+ "Portuguese": "pt_XX",
29
+ "Russian": "ru_RU",
30
+ "Japanese": "ja_XX",
31
+ "Korean": "ko_KR",
32
+ "Turkish": "tr_TR",
33
+ "Persian": "fa_IR",
34
+ "Bengali": "bn_IN",
35
+ "Punjabi": "pa_IN",
36
+ "Pashto": "ps_AF",
37
+ "Malay": "ms_MY",
38
+ "Indonesian": "id_ID",
39
+ "Tamil": "ta_IN"
40
  }
41
 
42
  # ---- Translation function ----
43
+ def translate_text(text, target_lang, auto_detect):
44
  if not text.strip():
45
+ return "⚠️ Please enter text to translate."
46
 
47
+ # Source language
48
+ if auto_detect:
49
+ # Very simple heuristic-based detection
50
+ if any("\u0600" <= ch <= "\u06FF" for ch in text):
51
+ src_lang = "ur_PK"
52
+ elif any("\u0900" <= ch <= "\u097F" for ch in text):
53
+ src_lang = "hi_IN"
54
+ else:
55
+ src_lang = "en_XX"
56
+ else:
57
+ src_lang = "en_XX"
58
 
 
59
  tgt_lang_code = LANG_CODES.get(target_lang, "ur_PK")
60
+
61
+ tokenizer.src_lang = src_lang
62
  tokenizer.tgt_lang = tgt_lang_code
63
 
64
+ inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True)
65
+ translated_tokens = model.generate(
66
+ **inputs,
67
+ max_length=256,
68
+ num_beams=5,
69
+ early_stopping=True
70
+ )
71
+ output = tokenizer.decode(translated_tokens[0], skip_special_tokens=True)
72
+ return output
73
 
74
+ # ---- Examples ----
75
+ examples = [
76
+ ["How are you?", "Urdu", False],
77
+ ["Where are you going?", "Arabic", False],
78
+ ["This is my new project.", "Hindi", False],
79
+ ["I love learning new languages.", "French", False],
80
+ ["Can you help me?", "Spanish", False],
81
+ ]
82
 
83
  # ---- Gradio Interface ----
84
+ with gr.Blocks(css="""
85
+ body {background: linear-gradient(to bottom right, #f7f9fb, #e0f7fa);}
86
+ .gr-button-primary {background-color: #1e3799 !important; color: white !important;}
87
+ """) as app:
88
+
89
+ gr.Markdown("""
 
 
 
 
90
  <div style='text-align:center;'>
91
+ <h2> Multi-Language Translator (mBART)</h2>
92
+ <p>Translate between English and 20+ languages using a fine-tuned mBART model.</p>
93
+ <p style='color:gray;'>Built by <b>Khurram Basharat</b> β€” powered by Hugging Face & Gradio.</p>
94
  </div>
95
+ """)
96
+
97
+ with gr.Row():
98
+ with gr.Column(scale=1):
99
+ text_input = gr.Textbox(label="Enter Text", placeholder="Type your sentence here...", lines=4)
100
+ target_lang = gr.Dropdown(sorted(LANG_CODES.keys()), label="Select Target Language", value="Urdu")
101
+ auto_detect = gr.Checkbox(label="Auto-detect Source Language", value=False)
102
+ translate_btn = gr.Button("Translate")
103
+
104
+ with gr.Column(scale=1):
105
+ result_output = gr.Textbox(label="Translation", lines=4)
106
+ copy_btn = gr.Button("πŸ“‹ Copy Translation")
107
+
108
+ gr.Examples(examples, inputs=[text_input, target_lang, auto_detect])
109
+
110
+ # ---- Actions ----
111
+ translate_btn.click(translate_text, inputs=[text_input, target_lang, auto_detect], outputs=result_output)
112
+ #copy_btn.click(None, inputs=result_output, outputs=None, _js="(text) => navigator.clipboard.writeText(text)")
113
 
114
+ # ---- Launch app ----
115
+ app.launch(server_name="0.0.0.0", server_port=7860)