import nltk import ntlk_utils #nltk are download in different file from nltk.corpus import wordnet as wn from nltk.tokenize import sent_tokenize import streamlit as st import torch from transformers import T5ForConditionalGeneration,T5Tokenizer import time st.set_page_config( page_title = "Home", ) st.title("NLP Shortcut") st.subheader("ParaSummarize is an advanced Natural Language Processing (NLP) model tailored to simplify the process of digesting lengthy paragraphs. With ParaSummarize, complex texts are distilled into concise, coherent summaries with just a click. This invaluable tool empowers researchers, students, and professionals to save time and gain quick insights from extensive content.") @st.cache_resource def get_model(): summary_model = T5ForConditionalGeneration.from_pretrained('t5-base') summary_tokenizer = T5Tokenizer.from_pretrained('t5-base') return summary_model,summary_tokenizer summary_model,summary_tokenizer = get_model() input_summary = st.text_area("Input the text to get the summary:",placeholder="Enter the text", height=200) # height in pixel button = st.button("Press to summarise") def postprocesstext (content): final="" for sent in sent_tokenize(content): sent = sent.capitalize() final = final +" "+sent return final def summarizer(text,model,tokenizer): text = text.strip().replace("\n"," ") text = "summarize: "+text print (text) max_len = 512 encoding = tokenizer.encode_plus(text,max_length=max_len, pad_to_max_length=False,truncation=True, return_tensors="pt") input_ids, attention_mask = encoding["input_ids"], encoding["attention_mask"] outs = model.generate(input_ids=input_ids, attention_mask=attention_mask, early_stopping=True, num_beams=3, num_return_sequences=1, no_repeat_ngram_size=2, min_length = 75, max_length=1000) dec = [tokenizer.decode(ids,skip_special_tokens=True) for ids in outs] summary = dec[0] summary = postprocesstext(summary) summary= summary.strip() return summary if input_summary and button: with st.spinner('Please wait...model is processing your input'): time.sleep(5) summarized_text = summarizer(input_summary,summary_model,summary_tokenizer) st.success("Success") st.balloons() st.write(summarized_text) #print("Original: ",input_summary) #print("After : ",summarized_text)