segestic's picture
Update app.py
46fbde0
# import torch
# from transformers import PegasusForConditionalGeneration, PegasusTokenizer
# model_name = 'tuner007/pegasus_paraphrase'
# torch_device = 'cuda' if torch.cuda.is_available() else 'cpu'
# tokenizer = PegasusTokenizer.from_pretrained(model_name)
# model = PegasusForConditionalGeneration.from_pretrained(model_name).to(torch_device)
# def get_response(input_text,num_return_sequences):
# batch = tokenizer.prepare_seq2seq_batch([input_text],truncation=True,padding='longest',max_length=60, return_tensors="pt").to(torch_device)
# translated = model.generate(**batch,max_length=60,num_beams=10, num_return_sequences=num_return_sequences, temperature=1.5)
# tgt_text = tokenizer.batch_decode(translated, skip_special_tokens=True)
# return tgt_text
# from sentence_splitter import SentenceSplitter, split_text_into_sentences
# splitter = SentenceSplitter(language='en')
# def paraphraze(text):
# sentence_list = splitter.split(text)
# paraphrase = []
# for i in sentence_list:
# a = get_response(i,1)
# paraphrase.append(a)
# paraphrase2 = [' '.join(x) for x in paraphrase]
# paraphrase3 = [' '.join(x for x in paraphrase2) ]
# paraphrased_text = str(paraphrase3).strip('[]').strip("'")
# return paraphrased_text
# def summarize(text):
# paraphrased_text = paraphraze(text)
# return paraphrased_text
########################################################################################################
import torch
from transformers import PegasusForConditionalGeneration, PegasusTokenizer
import streamlit as st
#from .paraphraser import get_paraphrased_sentences, model, tokenizer
hide_main_and_footer_style = """
<style>
#MainMenu {visibility: hidden;} footer{visibility: hidden;}
#root>div:nth-child(1)>div>div>div>div>section>div{padding-top: .2rem;
</style>
"""
st.markdown(hide_main_and_footer_style, unsafe_allow_html=True)
from transformers import *
model = PegasusForConditionalGeneration.from_pretrained("tuner007/pegasus_paraphrase")
tokenizer = PegasusTokenizerFast.from_pretrained("tuner007/pegasus_paraphrase")
def get_paraphrased_sentences(model, tokenizer, sentence, num_return_sequences=5, num_beams=5):
# tokenize the text to be form of a list of token IDs
inputs = tokenizer([sentence], truncation=True, padding="longest", return_tensors="pt")
# generate the paraphrased sentences
outputs = model.generate(
**inputs,
num_beams=num_beams,
num_return_sequences=num_return_sequences,
)
# decode the generated sentences using the tokenizer to get them back to text
return tokenizer.batch_decode(outputs, skip_special_tokens=True)
import streamlit as st
#from .paraphraser import get_paraphrased_sentences, model, tokenizer
#######
# from sentence_splitter import SentenceSplitter, split_text_into_sentences
# splitter = SentenceSplitter(language='en')
# def clean(text):
# sentence_list = splitter.split(text)
# paraphrase = []
# for i in sentence_list:
# paraphrase.append(i)
# paraphrase2 = [' '.join(x) for x in paraphrase]
# paraphrase3 = [' '.join(x for x in paraphrase2) ]
# paraphrased_text = str(paraphrase3).strip('[]').strip("'")
# return paraphrased_text
def app():
st.title('Paraphraser')
st.write('Please provide the text to be paraphrased')
col1, spacer, col2 = st.columns([5,1,5]) #st.beta_columns((2,1,1,1))
x = 0
output = ['Result ']
with col1:
user_input = st.text_area('Enter text','', height=300)
paraphraseNo = st.slider('Number of Parapharases',1,10,2)
if st.button('Paraphrase'):
with st.spinner(text="This may take a moment..."):
output = get_paraphrased_sentences(model, tokenizer, user_input, num_beams=10, num_return_sequences=paraphraseNo)
#with spacer:
with col2:
for x, element in enumerate(output):
user_output = st.text_area(label="", value=output[x], height=200 )
# st.markdown(
# '''<style>
# #MainMenu{visibility: hidden;} footer{visibility: hidden;}
# #root>div:nth-child(1)>div>div>div>div>section>div{padding-top: .2rem;
# </style>''', unsafe_allow_html=True
# )
if __name__ == '__main__':
app()