segestic commited on
Commit
ab5b1bd
·
1 Parent(s): 026cdc5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +46 -32
app.py CHANGED
@@ -1,38 +1,38 @@
1
- import torch
2
- from transformers import PegasusForConditionalGeneration, PegasusTokenizer
3
 
4
- model_name = 'tuner007/pegasus_paraphrase'
5
- torch_device = 'cuda' if torch.cuda.is_available() else 'cpu'
6
- tokenizer = PegasusTokenizer.from_pretrained(model_name)
7
- model = PegasusForConditionalGeneration.from_pretrained(model_name).to(torch_device)
8
 
9
- def get_response(input_text,num_return_sequences):
10
- batch = tokenizer.prepare_seq2seq_batch([input_text],truncation=True,padding='longest',max_length=60, return_tensors="pt").to(torch_device)
11
- translated = model.generate(**batch,max_length=60,num_beams=10, num_return_sequences=num_return_sequences, temperature=1.5)
12
- tgt_text = tokenizer.batch_decode(translated, skip_special_tokens=True)
13
- return tgt_text
14
 
15
- from sentence_splitter import SentenceSplitter, split_text_into_sentences
16
 
17
- splitter = SentenceSplitter(language='en')
18
 
19
- def paraphraze(text):
20
- sentence_list = splitter.split(text)
21
- paraphrase = []
22
 
23
- for i in sentence_list:
24
- a = get_response(i,1)
25
- paraphrase.append(a)
26
- paraphrase2 = [' '.join(x) for x in paraphrase]
27
- paraphrase3 = [' '.join(x for x in paraphrase2) ]
28
- paraphrased_text = str(paraphrase3).strip('[]').strip("'")
29
- return paraphrased_text
30
 
31
 
32
- def summarize(text):
33
 
34
- paraphrased_text = paraphraze(text)
35
- return paraphrased_text
36
  ########################################################################################################
37
 
38
  import torch
@@ -73,6 +73,22 @@ def get_paraphrased_sentences(model, tokenizer, sentence, num_return_sequences=5
73
  import streamlit as st
74
  #from .paraphraser import get_paraphrased_sentences, model, tokenizer
75
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
76
 
77
  def app():
78
  st.title('Paraphraser')
@@ -84,17 +100,16 @@ def app():
84
  with col1:
85
  user_input = st.text_area('Enter text','', height=300)
86
 
87
- #paraphraseNo = st.slider('Number of Parapharases',1,10,2)
88
  if st.button('Paraphrase'):
89
  with st.spinner(text="This may take a moment..."):
90
-
91
- output = summarize(user_input)
92
 
93
  #with spacer:
94
 
95
  with col2:
96
  for x, element in enumerate(output):
97
- user_output = st.text_area(label="", value=output, height=200 )
98
 
99
  # st.markdown(
100
  # '''<style>
@@ -104,5 +119,4 @@ def app():
104
  # )
105
 
106
  if __name__ == '__main__':
107
- app()
108
-
 
1
+ # import torch
2
+ # from transformers import PegasusForConditionalGeneration, PegasusTokenizer
3
 
4
+ # model_name = 'tuner007/pegasus_paraphrase'
5
+ # torch_device = 'cuda' if torch.cuda.is_available() else 'cpu'
6
+ # tokenizer = PegasusTokenizer.from_pretrained(model_name)
7
+ # model = PegasusForConditionalGeneration.from_pretrained(model_name).to(torch_device)
8
 
9
+ # def get_response(input_text,num_return_sequences):
10
+ # batch = tokenizer.prepare_seq2seq_batch([input_text],truncation=True,padding='longest',max_length=60, return_tensors="pt").to(torch_device)
11
+ # translated = model.generate(**batch,max_length=60,num_beams=10, num_return_sequences=num_return_sequences, temperature=1.5)
12
+ # tgt_text = tokenizer.batch_decode(translated, skip_special_tokens=True)
13
+ # return tgt_text
14
 
15
+ # from sentence_splitter import SentenceSplitter, split_text_into_sentences
16
 
17
+ # splitter = SentenceSplitter(language='en')
18
 
19
+ # def paraphraze(text):
20
+ # sentence_list = splitter.split(text)
21
+ # paraphrase = []
22
 
23
+ # for i in sentence_list:
24
+ # a = get_response(i,1)
25
+ # paraphrase.append(a)
26
+ # paraphrase2 = [' '.join(x) for x in paraphrase]
27
+ # paraphrase3 = [' '.join(x for x in paraphrase2) ]
28
+ # paraphrased_text = str(paraphrase3).strip('[]').strip("'")
29
+ # return paraphrased_text
30
 
31
 
32
+ # def summarize(text):
33
 
34
+ # paraphrased_text = paraphraze(text)
35
+ # return paraphrased_text
36
  ########################################################################################################
37
 
38
  import torch
 
73
  import streamlit as st
74
  #from .paraphraser import get_paraphrased_sentences, model, tokenizer
75
 
76
+ #######
77
+ from sentence_splitter import SentenceSplitter, split_text_into_sentences
78
+
79
+ splitter = SentenceSplitter(language='en')
80
+
81
+ def clean(text):
82
+ sentence_list = splitter.split(text)
83
+ paraphrase = []
84
+
85
+ for i in sentence_list:
86
+ paraphrase.append(i)
87
+ paraphrase2 = [' '.join(x) for x in paraphrase]
88
+ paraphrase3 = [' '.join(x for x in paraphrase2) ]
89
+ paraphrased_text = str(paraphrase3).strip('[]').strip("'")
90
+ return paraphrased_text
91
+
92
 
93
  def app():
94
  st.title('Paraphraser')
 
100
  with col1:
101
  user_input = st.text_area('Enter text','', height=300)
102
 
103
+ paraphraseNo = st.slider('Number of Parapharases',1,10,2)
104
  if st.button('Paraphrase'):
105
  with st.spinner(text="This may take a moment..."):
106
+ output = get_paraphrased_sentences(model, tokenizer, clean(user_input), num_beams=10, num_return_sequences=paraphraseNo)
 
107
 
108
  #with spacer:
109
 
110
  with col2:
111
  for x, element in enumerate(output):
112
+ user_output = st.text_area(label="", value=output[x], height=200 )
113
 
114
  # st.markdown(
115
  # '''<style>
 
119
  # )
120
 
121
  if __name__ == '__main__':
122
+ app()