|
|
from transformers import PegasusTokenizer, PegasusForConditionalGeneration
|
|
|
|
|
|
|
|
|
model_name = "tuner007/pegasus_paraphrase"
|
|
|
_tokenizer = None
|
|
|
_model = None
|
|
|
|
|
|
|
|
|
def _load_paraphrase_resources():
|
|
|
global _tokenizer, _model
|
|
|
if _tokenizer is None or _model is None:
|
|
|
_tokenizer = PegasusTokenizer.from_pretrained(model_name)
|
|
|
_model = PegasusForConditionalGeneration.from_pretrained(model_name)
|
|
|
return _tokenizer, _model
|
|
|
|
|
|
def paraphrase(text):
|
|
|
|
|
|
tokenizer, model = _load_paraphrase_resources()
|
|
|
|
|
|
try:
|
|
|
input_ids = tokenizer.encode(text, return_tensors='pt', truncation=True)
|
|
|
|
|
|
|
|
|
outputs = model.generate(
|
|
|
input_ids=input_ids,
|
|
|
num_beams=5,
|
|
|
num_return_sequences=3,
|
|
|
max_length=128
|
|
|
)
|
|
|
|
|
|
|
|
|
return [tokenizer.decode(output, skip_special_tokens=True) for output in outputs]
|
|
|
|
|
|
except Exception as exc:
|
|
|
raise RuntimeError("Paraphrasing failed") from exc
|
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
input_text = "The study investigates the correlation between socioeconomic status and academic achievement."
|
|
|
paraphrased_sentences = paraphrase(input_text)
|
|
|
|
|
|
print(f"Original sentence: {input_text}")
|
|
|
print("\nParaphrased sentences:")
|
|
|
for i, sentence in enumerate(paraphrased_sentences):
|
|
|
print(f"{i+1}. {sentence}") |