File size: 4,266 Bytes
ab5b1bd
 
7e76a61
ab5b1bd
 
 
 
27279fb
ab5b1bd
 
 
 
 
27279fb
ab5b1bd
27279fb
ab5b1bd
27279fb
ab5b1bd
 
 
27279fb
ab5b1bd
 
 
 
 
 
 
27279fb
 
ab5b1bd
27279fb
ab5b1bd
 
27279fb
7e76a61
 
 
 
 
 
 
31da57b
 
 
 
 
 
 
 
 
c974c12
ce29745
c974c12
ce29745
 
c974c12
 
ce29745
 
27279fb
ce29745
 
 
 
 
 
 
 
d02bf04
 
27279fb
 
d02bf04
ab5b1bd
46fbde0
ab5b1bd
46fbde0
ab5b1bd
46fbde0
 
 
ab5b1bd
46fbde0
 
 
 
 
 
ab5b1bd
d02bf04
fff6ca1
 
 
31da57b
d02bf04
fff6ca1
 
 
27279fb
fff6ca1
ab5b1bd
27279fb
ce29745
46fbde0
27279fb
fff6ca1
 
 
 
ab5b1bd
fff6ca1
13dd4d6
 
 
 
 
 
 
fff6ca1
ab5b1bd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
# import torch
# from transformers import PegasusForConditionalGeneration, PegasusTokenizer

# model_name = 'tuner007/pegasus_paraphrase'
# torch_device = 'cuda' if torch.cuda.is_available() else 'cpu'
# tokenizer = PegasusTokenizer.from_pretrained(model_name)
# model = PegasusForConditionalGeneration.from_pretrained(model_name).to(torch_device)

# def get_response(input_text,num_return_sequences):
#   batch = tokenizer.prepare_seq2seq_batch([input_text],truncation=True,padding='longest',max_length=60, return_tensors="pt").to(torch_device)
#   translated = model.generate(**batch,max_length=60,num_beams=10, num_return_sequences=num_return_sequences, temperature=1.5)
#   tgt_text = tokenizer.batch_decode(translated, skip_special_tokens=True)
#   return tgt_text

# from sentence_splitter import SentenceSplitter, split_text_into_sentences

# splitter = SentenceSplitter(language='en')

# def paraphraze(text):
#   sentence_list = splitter.split(text)
#   paraphrase = []

#   for i in sentence_list:
#     a = get_response(i,1)
#     paraphrase.append(a)
#     paraphrase2 = [' '.join(x) for x in paraphrase]
#     paraphrase3 = [' '.join(x for x in paraphrase2) ]
#   paraphrased_text = str(paraphrase3).strip('[]').strip("'")
#   return paraphrased_text


# def summarize(text):

#   paraphrased_text = paraphraze(text)
#   return paraphrased_text
########################################################################################################

import torch
from transformers import PegasusForConditionalGeneration, PegasusTokenizer

import streamlit as st
#from .paraphraser import get_paraphrased_sentences, model, tokenizer

hide_main_and_footer_style = """
        <style>
        #MainMenu {visibility: hidden;} footer{visibility: hidden;}
        #root>div:nth-child(1)>div>div>div>div>section>div{padding-top: .2rem;
        </style>
        """

st.markdown(hide_main_and_footer_style, unsafe_allow_html=True)


from transformers import *

model = PegasusForConditionalGeneration.from_pretrained("tuner007/pegasus_paraphrase")
tokenizer = PegasusTokenizerFast.from_pretrained("tuner007/pegasus_paraphrase")


def get_paraphrased_sentences(model, tokenizer, sentence, num_return_sequences=5, num_beams=5):
  # tokenize the text to be form of a list of token IDs
  inputs = tokenizer([sentence], truncation=True, padding="longest", return_tensors="pt")
  # generate the paraphrased sentences
  outputs = model.generate(
    **inputs,
    num_beams=num_beams,
    num_return_sequences=num_return_sequences,
  )
  # decode the generated sentences using the tokenizer to get them back to text
  return tokenizer.batch_decode(outputs, skip_special_tokens=True)


import streamlit as st
#from .paraphraser import get_paraphrased_sentences, model, tokenizer

#######
# from sentence_splitter import SentenceSplitter, split_text_into_sentences

# splitter = SentenceSplitter(language='en')

# def clean(text):
#   sentence_list = splitter.split(text)
#   paraphrase = []

#   for i in sentence_list:
#     paraphrase.append(i)
#     paraphrase2 = [' '.join(x) for x in paraphrase]
#     paraphrase3 = [' '.join(x for x in paraphrase2) ]
#   paraphrased_text = str(paraphrase3).strip('[]').strip("'")
#   return paraphrased_text


def app():
    st.title('Paraphraser')
    st.write('Please provide the text to be paraphrased')
    col1, spacer, col2 = st.columns([5,1,5]) #st.beta_columns((2,1,1,1))
    
    x = 0
    output = ['Result ']
    with col1:
        user_input = st.text_area('Enter text','', height=300)
        
        paraphraseNo = st.slider('Number of Parapharases',1,10,2)
        if st.button('Paraphrase'):
            with st.spinner(text="This may take a moment..."):
                output = get_paraphrased_sentences(model, tokenizer, user_input, num_beams=10, num_return_sequences=paraphraseNo)    
    
    #with spacer:
    
    with col2:
        for x, element in enumerate(output):
            user_output = st.text_area(label="", value=output[x], height=200 )

# st.markdown(
#     '''<style>
#     #MainMenu{visibility: hidden;} footer{visibility: hidden;}
#     #root>div:nth-child(1)>div>div>div>div>section>div{padding-top: .2rem;
#     </style>''', unsafe_allow_html=True
# )

if __name__ == '__main__':
	app()