segestic commited on
Commit
27279fb
·
1 Parent(s): ce29745

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +42 -45
app.py CHANGED
@@ -1,4 +1,39 @@
 
 
1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
 
3
  import torch
4
  from transformers import PegasusForConditionalGeneration, PegasusTokenizer
@@ -15,39 +50,6 @@ hide_main_and_footer_style = """
15
 
16
  st.markdown(hide_main_and_footer_style, unsafe_allow_html=True)
17
 
18
- model_name = 'tuner007/pegasus_paraphrase'
19
- torch_device = 'cuda' if torch.cuda.is_available() else 'cpu'
20
- tokenizer = PegasusTokenizer.from_pretrained(model_name)
21
- model = PegasusForConditionalGeneration.from_pretrained(model_name).to(torch_device)
22
-
23
- def get_response(input_text,num_return_sequences):
24
- batch = tokenizer.prepare_seq2seq_batch([input_text],truncation=True,padding='longest',max_length=60, return_tensors="pt").to(torch_device)
25
- translated = model.generate(**batch,max_length=60,num_beams=10, num_return_sequences=num_return_sequences, temperature=1.5)
26
- tgt_text = tokenizer.batch_decode(translated, skip_special_tokens=True)
27
- return tgt_text
28
-
29
- from sentence_splitter import SentenceSplitter, split_text_into_sentences
30
-
31
- splitter = SentenceSplitter(language='en')
32
-
33
- def paraphraze(text, how_many=1):
34
- sentence_list = splitter.split(text)
35
- paraphrase = []
36
-
37
- for i in sentence_list:
38
- a = get_response(i,how_many)
39
- paraphrase.append(a)
40
- paraphrase2 = [' '.join(x) for x in paraphrase]
41
- paraphrase3 = [' '.join(x for x in paraphrase2) ]
42
- paraphrased_text = paraphrase #str(paraphrase3).strip('[]').strip("'")
43
- return paraphrased_text
44
-
45
-
46
- def summarize(text):
47
-
48
- paraphrased_text = paraphraze(text)
49
- return paraphrased_text
50
- ########################################################################################################
51
 
52
  from transformers import *
53
 
@@ -57,7 +59,7 @@ tokenizer = PegasusTokenizerFast.from_pretrained("tuner007/pegasus_paraphrase")
57
 
58
  def get_paraphrased_sentences(model, tokenizer, sentence, num_return_sequences=5, num_beams=5):
59
  # tokenize the text to be form of a list of token IDs
60
- inputs = tokenizer([sentence], truncation=False, padding="longest", return_tensors="pt")
61
  # generate the paraphrased sentences
62
  outputs = model.generate(
63
  **inputs,
@@ -68,6 +70,8 @@ def get_paraphrased_sentences(model, tokenizer, sentence, num_return_sequences=5
68
  return tokenizer.batch_decode(outputs, skip_special_tokens=True)
69
 
70
 
 
 
71
 
72
 
73
  def app():
@@ -78,24 +82,18 @@ def app():
78
  x = 0
79
  output = ['Result ']
80
  with col1:
81
- user_input = st.text_area('Enter text','', height=200)
82
 
83
  paraphraseNo = st.slider('Number of Parapharases',1,10,2)
84
- if st.button('Single-Paraphrase'):
85
- with st.spinner(text="This may take a moment..."):
86
- output = summarize(user_input)
87
-
88
- if st.button('Multiple-Paraphrase'):
89
  with st.spinner(text="This may take a moment..."):
90
  output = get_paraphrased_sentences(model, tokenizer, user_input, num_beams=10, num_return_sequences=paraphraseNo)
91
-
92
-
93
-
94
  #with spacer:
95
 
96
  with col2:
97
  for x, element in enumerate(output):
98
- user_output = st.text_area(label="", value=output[x], height=150 )
99
 
100
  # st.markdown(
101
  # '''<style>
@@ -104,7 +102,6 @@ def app():
104
  # </style>''', unsafe_allow_html=True
105
  # )
106
 
107
-
108
  if __name__ == '__main__':
109
  app()
110
 
 
1
+ # import torch
2
+ # from transformers import PegasusForConditionalGeneration, PegasusTokenizer
3
 
4
+ # model_name = 'tuner007/pegasus_paraphrase'
5
+ # torch_device = 'cuda' if torch.cuda.is_available() else 'cpu'
6
+ # tokenizer = PegasusTokenizer.from_pretrained(model_name)
7
+ # model = PegasusForConditionalGeneration.from_pretrained(model_name).to(torch_device)
8
+
9
+ # def get_response(input_text,num_return_sequences):
10
+ # batch = tokenizer.prepare_seq2seq_batch([input_text],truncation=True,padding='longest',max_length=60, return_tensors="pt").to(torch_device)
11
+ # translated = model.generate(**batch,max_length=60,num_beams=10, num_return_sequences=num_return_sequences, temperature=1.5)
12
+ # tgt_text = tokenizer.batch_decode(translated, skip_special_tokens=True)
13
+ # return tgt_text
14
+
15
+ # from sentence_splitter import SentenceSplitter, split_text_into_sentences
16
+
17
+ # splitter = SentenceSplitter(language='en')
18
+
19
+ # def paraphraze(text):
20
+ # sentence_list = splitter.split(text)
21
+ # paraphrase = []
22
+
23
+ # for i in sentence_list:
24
+ # a = get_response(i,1)
25
+ # paraphrase.append(a)
26
+ # paraphrase2 = [' '.join(x) for x in paraphrase]
27
+ # paraphrase3 = [' '.join(x for x in paraphrase2) ]
28
+ # paraphrased_text = str(paraphrase3).strip('[]').strip("'")
29
+ # return paraphrased_text
30
+
31
+
32
+ # def summarize(text):
33
+
34
+ # paraphrased_text = paraphraze(text)
35
+ # return paraphrased_text
36
+ ########################################################################################################
37
 
38
  import torch
39
  from transformers import PegasusForConditionalGeneration, PegasusTokenizer
 
50
 
51
  st.markdown(hide_main_and_footer_style, unsafe_allow_html=True)
52
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
53
 
54
  from transformers import *
55
 
 
59
 
60
  def get_paraphrased_sentences(model, tokenizer, sentence, num_return_sequences=5, num_beams=5):
61
  # tokenize the text to be form of a list of token IDs
62
+ inputs = tokenizer([sentence], truncation=True, padding="longest", return_tensors="pt")
63
  # generate the paraphrased sentences
64
  outputs = model.generate(
65
  **inputs,
 
70
  return tokenizer.batch_decode(outputs, skip_special_tokens=True)
71
 
72
 
73
+ import streamlit as st
74
+ #from .paraphraser import get_paraphrased_sentences, model, tokenizer
75
 
76
 
77
  def app():
 
82
  x = 0
83
  output = ['Result ']
84
  with col1:
85
+ user_input = st.text_area('Enter text','', height=300)
86
 
87
  paraphraseNo = st.slider('Number of Parapharases',1,10,2)
88
+ if st.button('Paraphrase'):
 
 
 
 
89
  with st.spinner(text="This may take a moment..."):
90
  output = get_paraphrased_sentences(model, tokenizer, user_input, num_beams=10, num_return_sequences=paraphraseNo)
91
+
 
 
92
  #with spacer:
93
 
94
  with col2:
95
  for x, element in enumerate(output):
96
+ user_output = st.text_area(label="", value=output[x], height=200 )
97
 
98
  # st.markdown(
99
  # '''<style>
 
102
  # </style>''', unsafe_allow_html=True
103
  # )
104
 
 
105
  if __name__ == '__main__':
106
  app()
107