nit454 commited on
Commit
d3a446c
·
verified ·
1 Parent(s): c356095

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +77 -0
app.py ADDED
@@ -0,0 +1,77 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # app.py
2
+
3
+ from transformers import pipeline, MBart50Tokenizer, MBartForConditionalGeneration
4
+ from langdetect import detect
5
+ import gradio as gr
6
+
7
+ # Summarization pipeline (English)
8
+ summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
9
+
10
+ # Translation model (MBart multilingual)
11
+ model_name = "facebook/mbart-large-50-many-to-many-mmt"
12
+ tokenizer = MBart50Tokenizer.from_pretrained(model_name)
13
+ translator = MBartForConditionalGeneration.from_pretrained(model_name)
14
+
15
+ # Supported languages mapping
16
+ lang_map = {
17
+ "en": "en_XX",
18
+ "hi": "hi_IN",
19
+ "fr": "fr_XX",
20
+ "de": "de_DE",
21
+ "es": "es_XX",
22
+ "it": "it_IT",
23
+ "ta": "ta_IN",
24
+ "bn": "bn_IN",
25
+ }
26
+
27
+ def translate_text(text, src_lang, tgt_lang):
28
+ tokenizer.src_lang = src_lang
29
+ encoded = tokenizer(text, return_tensors="pt")
30
+ generated_tokens = translator.generate(**encoded, forced_bos_token_id=tokenizer.lang_code_to_id[tgt_lang])
31
+ return tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)[0]
32
+
33
+ def summarize_multilingual(text):
34
+ if not text or len(text.strip()) == 0:
35
+ return "⚠️ Please enter some text to summarize."
36
+
37
+ # Detect language
38
+ try:
39
+ lang = detect(text)
40
+ except:
41
+ lang = "en"
42
+
43
+ if lang not in lang_map:
44
+ lang = "en"
45
+
46
+ src_lang = lang_map[lang]
47
+ tgt_lang = "en_XX" # summarize in English first
48
+
49
+ # If input not English → translate to English
50
+ if src_lang != "en_XX":
51
+ text = translate_text(text, src_lang, tgt_lang)
52
+
53
+ # Summarize
54
+ summary = summarizer(text, max_length=130, min_length=30, do_sample=False)[0]['summary_text']
55
+
56
+ # Translate summary back to original language
57
+ if src_lang != "en_XX":
58
+ summary = translate_text(summary, "en_XX", src_lang)
59
+
60
+ return f"🌐 Detected language: {lang}\n\n🧠 Summary:\n{summary}"
61
+
62
+ # Gradio Interface
63
+ demo = gr.Interface(
64
+ fn=summarize_multilingual,
65
+ inputs=gr.Textbox(lines=12, placeholder="Paste text in English, Hindi, French, etc..."),
66
+ outputs=gr.Textbox(label="🌍 Multilingual Summary"),
67
+ title="🌍 Multilingual Text Summarizer using Hugging Face 🤗",
68
+ description="Supports English, Hindi, French, German, Spanish, Tamil, Bengali, and more.",
69
+ examples=[
70
+ ["Artificial Intelligence is transforming industries across the world with automation and intelligent data insights."],
71
+ ["कृत्रिम बुद्धिमत्ता स्वचालन और डेटा अंतर्दृष्टि के माध्यम से उद्योगों को बदल रही है।"],
72
+ ["L'intelligence artificielle transforme les industries grâce à l'automatisation et à l'analyse des données."]
73
+ ]
74
+ )
75
+
76
+ if __name__ == "__main__":
77
+ demo.launch()