Automate_MCQ / app.py
PankajNk's picture
Create app.py
30bae69
import nltk
import ntlk_utils #nltk are download in different file
from nltk.corpus import wordnet as wn
from nltk.tokenize import sent_tokenize
import streamlit as st
import torch
from transformers import T5ForConditionalGeneration,T5Tokenizer
import time
st.set_page_config(
page_title = "Home",
)
st.title("NLP Shortcut")
st.subheader("ParaSummarize is an advanced Natural Language Processing (NLP) model tailored to simplify the process of digesting lengthy paragraphs. With ParaSummarize, complex texts are distilled into concise, coherent summaries with just a click. This invaluable tool empowers researchers, students, and professionals to save time and gain quick insights from extensive content.")
@st.cache_resource
def get_model():
summary_model = T5ForConditionalGeneration.from_pretrained('t5-base')
summary_tokenizer = T5Tokenizer.from_pretrained('t5-base')
return summary_model,summary_tokenizer
summary_model,summary_tokenizer = get_model()
input_summary = st.text_area("Input the text to get the summary:",placeholder="Enter the text", height=200) # height in pixel
button = st.button("Press to summarise")
def postprocesstext (content):
final=""
for sent in sent_tokenize(content):
sent = sent.capitalize()
final = final +" "+sent
return final
def summarizer(text,model,tokenizer):
text = text.strip().replace("\n"," ")
text = "summarize: "+text
print (text)
max_len = 512
encoding = tokenizer.encode_plus(text,max_length=max_len, pad_to_max_length=False,truncation=True, return_tensors="pt")
input_ids, attention_mask = encoding["input_ids"], encoding["attention_mask"]
outs = model.generate(input_ids=input_ids,
attention_mask=attention_mask,
early_stopping=True,
num_beams=3,
num_return_sequences=1,
no_repeat_ngram_size=2,
min_length = 75,
max_length=1000)
dec = [tokenizer.decode(ids,skip_special_tokens=True) for ids in outs]
summary = dec[0]
summary = postprocesstext(summary)
summary= summary.strip()
return summary
if input_summary and button:
with st.spinner('Please wait...model is processing your input'):
time.sleep(5)
summarized_text = summarizer(input_summary,summary_model,summary_tokenizer)
st.success("Success")
st.balloons()
st.write(summarized_text)
#print("Original: ",input_summary)
#print("After : ",summarized_text)