segestic commited on
Commit
bbc9b64
·
1 Parent(s): cd12ba4

Create paraphraser.py

Browse files
Files changed (1) hide show
  1. paraphraser.py +17 -0
paraphraser.py ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import *
2
+
3
+ model = PegasusForConditionalGeneration.from_pretrained("tuner007/pegasus_paraphrase")
4
+ tokenizer = PegasusTokenizerFast.from_pretrained("tuner007/pegasus_paraphrase")
5
+
6
+
7
+ def get_paraphrased_sentences(model, tokenizer, sentence, num_return_sequences=5, num_beams=5):
8
+ # tokenize the text to be form of a list of token IDs
9
+ inputs = tokenizer([sentence], truncation=True, padding="longest", return_tensors="pt")
10
+ # generate the paraphrased sentences
11
+ outputs = model.generate(
12
+ **inputs,
13
+ num_beams=num_beams,
14
+ num_return_sequences=num_return_sequences,
15
+ )
16
+ # decode the generated sentences using the tokenizer to get them back to text
17
+ return tokenizer.batch_decode(outputs, skip_special_tokens=True)