Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -20,23 +20,27 @@ def paraphrase_t5(text, temperature=0.9, top_p=0.92):
|
|
| 20 |
paraphrased_paragraphs = []
|
| 21 |
|
| 22 |
for p in paragraphs:
|
| 23 |
-
|
|
|
|
|
|
|
|
|
|
| 24 |
|
| 25 |
outputs = model.generate(
|
| 26 |
**inputs,
|
| 27 |
max_new_tokens=256,
|
| 28 |
do_sample=True,
|
| 29 |
-
top_p=float(top_p),
|
| 30 |
-
temperature=float(temperature),
|
| 31 |
-
num_return_sequences=1
|
|
|
|
| 32 |
)
|
| 33 |
|
| 34 |
paraphrased = tokenizer.decode(outputs[0], skip_special_tokens=True)
|
| 35 |
paraphrased_paragraphs.append(paraphrased)
|
| 36 |
|
| 37 |
-
# Join paraphrased paragraphs with newlines
|
| 38 |
return "\n\n".join(paraphrased_paragraphs)
|
| 39 |
|
|
|
|
| 40 |
# Gradio UI
|
| 41 |
iface = gr.Interface(
|
| 42 |
fn=paraphrase_t5,
|
|
|
|
| 20 |
paraphrased_paragraphs = []
|
| 21 |
|
| 22 |
for p in paragraphs:
|
| 23 |
+
# Stronger paraphrasing prompt
|
| 24 |
+
prompt = f"Paraphrase this in a more natural, human style while keeping meaning:\n{p}"
|
| 25 |
+
|
| 26 |
+
inputs = tokenizer([prompt], return_tensors="pt", truncation=True, padding=True).to(device)
|
| 27 |
|
| 28 |
outputs = model.generate(
|
| 29 |
**inputs,
|
| 30 |
max_new_tokens=256,
|
| 31 |
do_sample=True,
|
| 32 |
+
top_p=float(top_p),
|
| 33 |
+
temperature=float(temperature),
|
| 34 |
+
num_return_sequences=1,
|
| 35 |
+
no_repeat_ngram_size=3 # avoid repeats like 'inequality, inequality'
|
| 36 |
)
|
| 37 |
|
| 38 |
paraphrased = tokenizer.decode(outputs[0], skip_special_tokens=True)
|
| 39 |
paraphrased_paragraphs.append(paraphrased)
|
| 40 |
|
|
|
|
| 41 |
return "\n\n".join(paraphrased_paragraphs)
|
| 42 |
|
| 43 |
+
|
| 44 |
# Gradio UI
|
| 45 |
iface = gr.Interface(
|
| 46 |
fn=paraphrase_t5,
|