File size: 1,667 Bytes
595267b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
from transformers import PegasusTokenizer, PegasusForConditionalGeneration

# 1. Lazy-load the PEGASUS paraphrasing model so imports stay quick
model_name = "tuner007/pegasus_paraphrase"
_tokenizer = None
_model = None


def _load_paraphrase_resources():
    global _tokenizer, _model
    if _tokenizer is None or _model is None:
        _tokenizer = PegasusTokenizer.from_pretrained(model_name)
        _model = PegasusForConditionalGeneration.from_pretrained(model_name)
    return _tokenizer, _model

def paraphrase(text):
    # 4. No prefix is needed for this model
    tokenizer, model = _load_paraphrase_resources()

    try:
        input_ids = tokenizer.encode(text, return_tensors='pt', truncation=True)

        # 5. Generate multiple (e.g., 3) paraphrases
        outputs = model.generate(
            input_ids=input_ids,
            num_beams=5,
            num_return_sequences=3,  # Generate 3 different options
            max_length=128
        )

        # 6. Decode the list of output sequences
        return [tokenizer.decode(output, skip_special_tokens=True) for output in outputs]

    except Exception as exc:
        raise RuntimeError("Paraphrasing failed") from exc

# 7. A better example sentence to test academic paraphrasing
if __name__ == "__main__":
    input_text = "The study investigates the correlation between socioeconomic status and academic achievement."
    paraphrased_sentences = paraphrase(input_text)

    print(f"Original sentence: {input_text}")
    print("\nParaphrased sentences:")
    for i, sentence in enumerate(paraphrased_sentences):
        print(f"{i+1}. {sentence}")