Spaces:
Runtime error
Runtime error
File size: 2,780 Bytes
256c331 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 |
# app.py
from transformers import pipeline, MBart50Tokenizer, MBartForConditionalGeneration
from langdetect import detect
import gradio as gr
# Summarization pipeline (English)
summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
# Translation model (MBart multilingual)
model_name = "facebook/mbart-large-50-many-to-many-mmt"
tokenizer = MBart50Tokenizer.from_pretrained(model_name)
translator = MBartForConditionalGeneration.from_pretrained(model_name)
# Supported languages mapping
lang_map = {
"en": "en_XX",
"hi": "hi_IN",
"fr": "fr_XX",
"de": "de_DE",
"es": "es_XX",
"it": "it_IT",
"ta": "ta_IN",
"bn": "bn_IN",
}
def translate_text(text, src_lang, tgt_lang):
tokenizer.src_lang = src_lang
encoded = tokenizer(text, return_tensors="pt")
generated_tokens = translator.generate(**encoded, forced_bos_token_id=tokenizer.lang_code_to_id[tgt_lang])
return tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)[0]
def summarize_multilingual(text):
if not text or len(text.strip()) == 0:
return "⚠️ Please enter some text to summarize."
# Detect language
try:
lang = detect(text)
except:
lang = "en"
if lang not in lang_map:
lang = "en"
src_lang = lang_map[lang]
tgt_lang = "en_XX" # summarize in English first
# If input not English → translate to English
if src_lang != "en_XX":
text = translate_text(text, src_lang, tgt_lang)
# Summarize
summary = summarizer(text, max_length=130, min_length=30, do_sample=False)[0]['summary_text']
# Translate summary back to original language
if src_lang != "en_XX":
summary = translate_text(summary, "en_XX", src_lang)
return f"🌐 Detected language: {lang}\n\n🧠 Summary:\n{summary}"
# Gradio Interface
demo = gr.Interface(
fn=summarize_multilingual,
inputs=gr.Textbox(lines=12, placeholder="Paste text in English, Hindi, French, etc..."),
outputs=gr.Textbox(label="🌍 Multilingual Summary"),
title="🌍 Multilingual Text Summarizer using Hugging Face 🤗",
description="Supports English, Hindi, French, German, Spanish, Tamil, Bengali, and more.",
examples=[
["Artificial Intelligence is transforming industries across the world with automation and intelligent data insights."],
["कृत्रिम बुद्धिमत्ता स्वचालन और डेटा अंतर्दृष्टि के माध्यम से उद्योगों को बदल रही है।"],
["L'intelligence artificielle transforme les industries grâce à l'automatisation et à l'analyse des données."]
]
)
if __name__ == "__main__":
demo.launch()
|