khurrambasharat commited on
Commit
34870b9
·
verified ·
1 Parent(s): 4a9c2ef

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +31 -67
app.py CHANGED
@@ -1,75 +1,39 @@
1
- import os
2
- os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "0"
3
-
4
- from transformers import MBart50TokenizerFast, MBartForConditionalGeneration, AutoConfig
5
  import gradio as gr
6
 
7
- # ---- Load model & tokenizer ----
8
- model_name = "Mudasir692/mbart-eng-ur"
9
-
10
- # Fix config issue
11
- config = AutoConfig.from_pretrained(model_name)
12
- if config.early_stopping is None:
13
- config.early_stopping = True
14
-
15
- tokenizer = MBart50TokenizerFast.from_pretrained(model_name)
16
- model = MBartForConditionalGeneration.from_pretrained(model_name, config=config)
17
-
18
- # ---- Language mapping ----
19
- LANG_CODES = {
20
- "Urdu": "ur_PK",
21
- "Arabic": "ar_AR",
22
- "Hindi": "hi_IN",
23
- }
24
 
25
- # ---- Translation function ----
26
- def translate_text(text, target_lang):
27
  if not text.strip():
28
- return "Please enter some English text."
29
-
30
- tgt_lang_code = LANG_CODES.get(target_lang, "ur_PK")
31
- tokenizer.src_lang = "en_XX"
32
- tokenizer.tgt_lang = tgt_lang_code
33
-
34
- inputs = tokenizer(text, return_tensors="pt", padding=True)
35
- translated_tokens = model.generate(**inputs)
36
- output = tokenizer.decode(translated_tokens[0], skip_special_tokens=True)
37
- return output
38
-
39
- # ---- Examples ----
40
- examples = [
41
- ["How are you?", "Urdu"],
42
- ["Where are you going?", "Arabic"],
43
- ["This is my new project.", "Hindi"],
44
- ]
45
-
46
- # ---- Gradio Interface ----
47
  app = gr.Interface(
48
  fn=translate_text,
49
- inputs=[
50
- gr.Textbox(label="Enter English Text", placeholder="Type your English sentence here...", lines=2),
51
- gr.Dropdown(["Urdu", "Arabic", "Hindi"], label="Select Target Language", value="Urdu")
52
- ],
53
- outputs=gr.Textbox(label="Translation", lines=2),
54
- examples=examples,
55
- title="🌍 Multi-Language Translator",
56
- description="""
57
- <div style='text-align:center;'>
58
- <h3>Translate English sentences into Urdu, Arabic, or Hindi using a fine-tuned mBART model.</h3>
59
- <p style='color:gray;'>Built by <b>Khurram Basharat</b> — powered by Transformers & Gradio.</p>
60
- <p><i>⏳ The model loads on first use, please wait a few seconds.</i></p>
61
- </div>
62
- """,
63
- css="""
64
- body {
65
- background: linear-gradient(to bottom right, #f1f2f6, #dff9fb);
66
- font-family: 'Segoe UI', sans-serif;
67
- }
68
- .gr-button-primary {
69
- background-color: #1e3799 !important;
70
- color: white !important;
71
- }
72
- """,
73
  )
74
 
75
- app.launch()
 
 
 
1
+ from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
 
 
 
2
  import gradio as gr
3
 
4
+ # Load model and tokenizer
5
+ model_name = "Helsinki-NLP/opus-mt-en-ur" # English ↔ Urdu model
6
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
7
+ model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
 
 
 
 
 
 
 
 
 
 
 
 
 
8
 
9
+ def translate_text(text):
 
10
  if not text.strip():
11
+ return "Please enter some text to translate."
12
+
13
+ # Tokenize input
14
+ inputs = tokenizer(text, return_tensors="pt", truncation=True)
15
+
16
+ # Generate translation (explicit max_length fix)
17
+ translated_tokens = model.generate(
18
+ **inputs,
19
+ max_length=256, # Fix for max_length=None error
20
+ num_beams=5, # Enables beam search (more accurate)
21
+ early_stopping=True
22
+ )
23
+
24
+ # Decode output
25
+ translated_text = tokenizer.decode(translated_tokens[0], skip_special_tokens=True)
26
+ return translated_text
27
+
28
+ # Build Gradio Interface
 
29
  app = gr.Interface(
30
  fn=translate_text,
31
+ inputs=gr.Textbox(lines=3, placeholder="Enter English or Urdu text..."),
32
+ outputs="text",
33
+ title="English Urdu Translator",
34
+ description="Translate between English and Urdu using a Hugging Face translation model.",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
35
  )
36
 
37
+ # Launch app
38
+ if __name__ == "__main__":
39
+ app.launch(server_name="0.0.0.0", server_port=7860)