File size: 2,614 Bytes
30bae69
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
import nltk
import ntlk_utils #nltk are download in different file
from nltk.corpus import wordnet as wn
from nltk.tokenize import sent_tokenize

import streamlit as st
import torch
from transformers import T5ForConditionalGeneration,T5Tokenizer

import time


st.set_page_config(
    page_title = "Home",
)
st.title("NLP Shortcut")
st.subheader("ParaSummarize is an advanced Natural Language Processing (NLP) model tailored to simplify the process of digesting lengthy paragraphs. With ParaSummarize, complex texts are distilled into concise, coherent summaries with just a click. This invaluable tool empowers researchers, students, and professionals to save time and gain quick insights from extensive content.")

@st.cache_resource
def get_model():
    summary_model = T5ForConditionalGeneration.from_pretrained('t5-base')
    summary_tokenizer = T5Tokenizer.from_pretrained('t5-base')
    return summary_model,summary_tokenizer

summary_model,summary_tokenizer = get_model()

input_summary = st.text_area("Input the text to get the summary:",placeholder="Enter the text", height=200) # height in pixel
button = st.button("Press to summarise")

def postprocesstext (content):
  final=""
  for sent in sent_tokenize(content):
    sent = sent.capitalize()
    final = final +" "+sent
  return final

def summarizer(text,model,tokenizer):
  text = text.strip().replace("\n"," ")
  text = "summarize: "+text
  print (text)
  max_len = 512
  encoding = tokenizer.encode_plus(text,max_length=max_len, pad_to_max_length=False,truncation=True, return_tensors="pt")

  input_ids, attention_mask = encoding["input_ids"], encoding["attention_mask"]

  outs = model.generate(input_ids=input_ids,
                                  attention_mask=attention_mask,
                                  early_stopping=True,
                                  num_beams=3,
                                  num_return_sequences=1,
                                  no_repeat_ngram_size=2,
                                  min_length = 75,
                                  max_length=1000)


  dec = [tokenizer.decode(ids,skip_special_tokens=True) for ids in outs]
  summary = dec[0]
  summary = postprocesstext(summary)
  summary= summary.strip()

  return summary

if input_summary and button:
    with st.spinner('Please wait...model is processing your input'):
        time.sleep(5)
        summarized_text = summarizer(input_summary,summary_model,summary_tokenizer)
    st.success("Success")
    st.balloons()
    st.write(summarized_text)
    
    #print("Original:   ",input_summary)
    #print("After :   ",summarized_text)