VictorM-Coder commited on
Commit
432272a
·
verified ·
1 Parent(s): 29fe724

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +9 -5
app.py CHANGED
@@ -20,23 +20,27 @@ def paraphrase_t5(text, temperature=0.9, top_p=0.92):
20
  paraphrased_paragraphs = []
21
 
22
  for p in paragraphs:
23
- inputs = tokenizer([f"paraphrase: {p}"], return_tensors="pt", truncation=True, padding=True).to(device)
 
 
 
24
 
25
  outputs = model.generate(
26
  **inputs,
27
  max_new_tokens=256,
28
  do_sample=True,
29
- top_p=float(top_p), # nucleus sampling
30
- temperature=float(temperature), # creativity
31
- num_return_sequences=1
 
32
  )
33
 
34
  paraphrased = tokenizer.decode(outputs[0], skip_special_tokens=True)
35
  paraphrased_paragraphs.append(paraphrased)
36
 
37
- # Join paraphrased paragraphs with newlines
38
  return "\n\n".join(paraphrased_paragraphs)
39
 
 
40
  # Gradio UI
41
  iface = gr.Interface(
42
  fn=paraphrase_t5,
 
20
  paraphrased_paragraphs = []
21
 
22
  for p in paragraphs:
23
+ # Stronger paraphrasing prompt
24
+ prompt = f"Paraphrase this in a more natural, human style while keeping meaning:\n{p}"
25
+
26
+ inputs = tokenizer([prompt], return_tensors="pt", truncation=True, padding=True).to(device)
27
 
28
  outputs = model.generate(
29
  **inputs,
30
  max_new_tokens=256,
31
  do_sample=True,
32
+ top_p=float(top_p),
33
+ temperature=float(temperature),
34
+ num_return_sequences=1,
35
+ no_repeat_ngram_size=3 # avoid repeats like 'inequality, inequality'
36
  )
37
 
38
  paraphrased = tokenizer.decode(outputs[0], skip_special_tokens=True)
39
  paraphrased_paragraphs.append(paraphrased)
40
 
 
41
  return "\n\n".join(paraphrased_paragraphs)
42
 
43
+
44
  # Gradio UI
45
  iface = gr.Interface(
46
  fn=paraphrase_t5,