File size: 2,780 Bytes
256c331
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
# app.py

from transformers import pipeline, MBart50Tokenizer, MBartForConditionalGeneration
from langdetect import detect
import gradio as gr

# Summarization pipeline (English)
summarizer = pipeline("summarization", model="facebook/bart-large-cnn")

# Translation model (MBart multilingual)
model_name = "facebook/mbart-large-50-many-to-many-mmt"
tokenizer = MBart50Tokenizer.from_pretrained(model_name)
translator = MBartForConditionalGeneration.from_pretrained(model_name)

# Supported languages mapping
lang_map = {
    "en": "en_XX",
    "hi": "hi_IN",
    "fr": "fr_XX",
    "de": "de_DE",
    "es": "es_XX",
    "it": "it_IT",
    "ta": "ta_IN",
    "bn": "bn_IN",
}

def translate_text(text, src_lang, tgt_lang):
    tokenizer.src_lang = src_lang
    encoded = tokenizer(text, return_tensors="pt")
    generated_tokens = translator.generate(**encoded, forced_bos_token_id=tokenizer.lang_code_to_id[tgt_lang])
    return tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)[0]

def summarize_multilingual(text):
    if not text or len(text.strip()) == 0:
        return "⚠️ Please enter some text to summarize."

    # Detect language
    try:
        lang = detect(text)
    except:
        lang = "en"

    if lang not in lang_map:
        lang = "en"

    src_lang = lang_map[lang]
    tgt_lang = "en_XX"  # summarize in English first

    # If input not English → translate to English
    if src_lang != "en_XX":
        text = translate_text(text, src_lang, tgt_lang)

    # Summarize
    summary = summarizer(text, max_length=130, min_length=30, do_sample=False)[0]['summary_text']

    # Translate summary back to original language
    if src_lang != "en_XX":
        summary = translate_text(summary, "en_XX", src_lang)

    return f"🌐 Detected language: {lang}\n\n🧠 Summary:\n{summary}"

# Gradio Interface
demo = gr.Interface(
    fn=summarize_multilingual,
    inputs=gr.Textbox(lines=12, placeholder="Paste text in English, Hindi, French, etc..."),
    outputs=gr.Textbox(label="🌍 Multilingual Summary"),
    title="🌍 Multilingual Text Summarizer using Hugging Face 🤗",
    description="Supports English, Hindi, French, German, Spanish, Tamil, Bengali, and more.",
    examples=[
        ["Artificial Intelligence is transforming industries across the world with automation and intelligent data insights."],
        ["कृत्रिम बुद्धिमत्ता स्वचालन और डेटा अंतर्दृष्टि के माध्यम से उद्योगों को बदल रही है।"],
        ["L'intelligence artificielle transforme les industries grâce à l'automatisation et à l'analyse des données."]
    ]
)

if __name__ == "__main__":
    demo.launch()