Spaces:

segestic
/

ParaphraseStreamlt

Runtime error

File size: 4,266 Bytes

ab5b1bd
 
7e76a61
ab5b1bd
 
 
 
27279fb
ab5b1bd
 
 
 
 
27279fb
ab5b1bd
27279fb
ab5b1bd
27279fb
ab5b1bd
 
 
27279fb
ab5b1bd
 
 
 
 
 
 
27279fb
 
ab5b1bd
27279fb
ab5b1bd
 
27279fb
7e76a61
 
 
 
 
 
 
31da57b
 
 
 
 
 
 
 
 
c974c12
ce29745
c974c12
ce29745
 
c974c12
 
ce29745
 
27279fb
ce29745
 
 
 
 
 
 
 
d02bf04
 
27279fb
 
d02bf04
ab5b1bd
46fbde0
ab5b1bd
46fbde0
ab5b1bd
46fbde0
 
 
ab5b1bd
46fbde0
 
 
 
 
 
ab5b1bd
d02bf04
fff6ca1
 
 
31da57b
d02bf04
fff6ca1
 
 
27279fb
fff6ca1
ab5b1bd
27279fb
ce29745
46fbde0
27279fb
fff6ca1
 
 
 
ab5b1bd
fff6ca1
13dd4d6
 
 
 
 
 
 
fff6ca1
ab5b1bd

# import torch
# from transformers import PegasusForConditionalGeneration, PegasusTokenizer

# model_name = 'tuner007/pegasus_paraphrase'
# torch_device = 'cuda' if torch.cuda.is_available() else 'cpu'
# tokenizer = PegasusTokenizer.from_pretrained(model_name)
# model = PegasusForConditionalGeneration.from_pretrained(model_name).to(torch_device)

# def get_response(input_text,num_return_sequences):
#   batch = tokenizer.prepare_seq2seq_batch([input_text],truncation=True,padding='longest',max_length=60, return_tensors="pt").to(torch_device)
#   translated = model.generate(**batch,max_length=60,num_beams=10, num_return_sequences=num_return_sequences, temperature=1.5)
#   tgt_text = tokenizer.batch_decode(translated, skip_special_tokens=True)
#   return tgt_text

# from sentence_splitter import SentenceSplitter, split_text_into_sentences

# splitter = SentenceSplitter(language='en')

# def paraphraze(text):
#   sentence_list = splitter.split(text)
#   paraphrase = []

#   for i in sentence_list:
#     a = get_response(i,1)
#     paraphrase.append(a)
#     paraphrase2 = [' '.join(x) for x in paraphrase]
#     paraphrase3 = [' '.join(x for x in paraphrase2) ]
#   paraphrased_text = str(paraphrase3).strip('[]').strip("'")
#   return paraphrased_text


# def summarize(text):

#   paraphrased_text = paraphraze(text)
#   return paraphrased_text
########################################################################################################

import torch
from transformers import PegasusForConditionalGeneration, PegasusTokenizer

import streamlit as st
#from .paraphraser import get_paraphrased_sentences, model, tokenizer

hide_main_and_footer_style = """
        <style>
        #MainMenu {visibility: hidden;} footer{visibility: hidden;}
        #root>div:nth-child(1)>div>div>div>div>section>div{padding-top: .2rem;
        </style>
        """

st.markdown(hide_main_and_footer_style, unsafe_allow_html=True)


from transformers import *

model = PegasusForConditionalGeneration.from_pretrained("tuner007/pegasus_paraphrase")
tokenizer = PegasusTokenizerFast.from_pretrained("tuner007/pegasus_paraphrase")


def get_paraphrased_sentences(model, tokenizer, sentence, num_return_sequences=5, num_beams=5):
  # tokenize the text to be form of a list of token IDs
  inputs = tokenizer([sentence], truncation=True, padding="longest", return_tensors="pt")
  # generate the paraphrased sentences
  outputs = model.generate(
    **inputs,
    num_beams=num_beams,
    num_return_sequences=num_return_sequences,
  )
  # decode the generated sentences using the tokenizer to get them back to text
  return tokenizer.batch_decode(outputs, skip_special_tokens=True)


import streamlit as st
#from .paraphraser import get_paraphrased_sentences, model, tokenizer

#######
# from sentence_splitter import SentenceSplitter, split_text_into_sentences

# splitter = SentenceSplitter(language='en')

# def clean(text):
#   sentence_list = splitter.split(text)
#   paraphrase = []

#   for i in sentence_list:
#     paraphrase.append(i)
#     paraphrase2 = [' '.join(x) for x in paraphrase]
#     paraphrase3 = [' '.join(x for x in paraphrase2) ]
#   paraphrased_text = str(paraphrase3).strip('[]').strip("'")
#   return paraphrased_text


def app():
    st.title('Paraphraser')
    st.write('Please provide the text to be paraphrased')
    col1, spacer, col2 = st.columns([5,1,5]) #st.beta_columns((2,1,1,1))
    
    x = 0
    output = ['Result ']
    with col1:
        user_input = st.text_area('Enter text','', height=300)
        
        paraphraseNo = st.slider('Number of Parapharases',1,10,2)
        if st.button('Paraphrase'):
            with st.spinner(text="This may take a moment..."):
                output = get_paraphrased_sentences(model, tokenizer, user_input, num_beams=10, num_return_sequences=paraphraseNo)    
    
    #with spacer:
    
    with col2:
        for x, element in enumerate(output):
            user_output = st.text_area(label="", value=output[x], height=200 )

# st.markdown(
#     '''<style>
#     #MainMenu{visibility: hidden;} footer{visibility: hidden;}
#     #root>div:nth-child(1)>div>div>div>div>section>div{padding-top: .2rem;
#     </style>''', unsafe_allow_html=True
# )

if __name__ == '__main__':
	app()