Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import re | |
| from transformers import MBartForConditionalGeneration, MBart50TokenizerFast, pipeline | |
| model = MBartForConditionalGeneration.from_pretrained("facebook/mbart-large-50-one-to-many-mmt") | |
| tokenizer = MBart50TokenizerFast.from_pretrained("facebook/mbart-large-50-one-to-many-mmt", src_lang="en_XX") | |
| pipe2 = pipeline('summarization', model="Tiju1996/t5-small-finetuned-xsum") | |
| def process_text(text): | |
| # Remove all reference citations | |
| text = re.sub(r'\[[0-9]*\]', '', text) | |
| # Remove all footnotes | |
| text = re.sub(r'\[\d*\]', '', text) | |
| # Remove all images | |
| text = re.sub(r'(\[[^\]]*\])?\[[^\]]*\]', '', text) | |
| # Remove all non-string characters | |
| text = re.sub(r'[^\x00-\x7F]+', '', text) | |
| # Remove all emojis | |
| emoji_pattern = re.compile("[" | |
| u"\U0001F600-\U0001F64F" # emoticons | |
| u"\U0001F300-\U0001F5FF" # symbols & pictographs | |
| u"\U0001F680-\U0001F6FF" # transport & map symbols | |
| u"\U0001F1E0-\U0001F1FF" # flags (iOS) | |
| u"\U00002702-\U000027B0" | |
| u"\U000024C2-\U0001F251" | |
| "]+", flags=re.UNICODE) | |
| text = emoji_pattern.sub(r'', text) | |
| # Remove all HTML tags | |
| text = re.sub(r'<.*?>', '', text) | |
| #Remove all hyperlinks from the text | |
| text=re.sub(r'\[([^\]]+)\]\(([^)]+)\)', r'\1', text) | |
| #Remove all url from the text | |
| text=re.sub(r'http\S+', '', text) | |
| # Strip whitespace | |
| text = text.strip(" ") | |
| return text | |
| def summarize(article_en_raw): | |
| article_en=process_text(article_en_raw) | |
| summary_en=pipe2(article_en) | |
| model_inputs = tokenizer(summary_en[0]['summary_text'], return_tensors="pt") | |
| generated_tokens = model.generate( | |
| **model_inputs, | |
| forced_bos_token_id=tokenizer.lang_code_to_id["hi_IN"] | |
| ) | |
| translation = tokenizer.batch_decode(generated_tokens, skip_special_tokens=True) | |
| return translation[0] | |
| input_text = gr.inputs.Textbox(lines=20, label="Enter text document to be summarized") | |
| output_text = gr.outputs.Textbox(label="Summarized Text") | |
| #gr.Interface(fn=summarize, inputs=input_text, outputs=output_text, title="Text Summarization App", description="Enter a text document and get its summarized version.").launch() | |
| gradio_interface = gr.Interface(fn=summarize, inputs=input_text, outputs=output_text, | |
| title="T5-Text Summarization App", | |
| description="Enter a text document and get its summarized version.") | |
| if __name__ == "__main__": | |
| gradio_interface.launch() |