Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -11,6 +11,7 @@ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
|
| 11 |
model.to(device)
|
| 12 |
model.eval()
|
| 13 |
|
|
|
|
| 14 |
def split_sentences(text):
|
| 15 |
sentences = re.split(r'(?<=[.!?])\s+', text.strip())
|
| 16 |
return [s for s in sentences if s]
|
|
@@ -21,33 +22,40 @@ def clean_sentence(sent):
|
|
| 21 |
sent += "."
|
| 22 |
return sent
|
| 23 |
|
|
|
|
| 24 |
def paraphrase_fn(text, num_return_sequences=1, temperature=0.8, top_p=0.9):
|
| 25 |
if not text.strip():
|
| 26 |
return "⚠️ Please enter some text"
|
| 27 |
|
| 28 |
-
num_return_sequences = int(num_return_sequences)
|
| 29 |
sentences = split_sentences(text)
|
| 30 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 31 |
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 35 |
|
| 36 |
-
|
| 37 |
-
**inputs,
|
| 38 |
-
max_new_tokens=128,
|
| 39 |
-
num_return_sequences=num_return_sequences,
|
| 40 |
-
do_sample=True,
|
| 41 |
-
top_p=top_p,
|
| 42 |
-
temperature=temperature,
|
| 43 |
-
no_repeat_ngram_size=2,
|
| 44 |
-
early_stopping=True
|
| 45 |
-
)
|
| 46 |
|
| 47 |
-
|
| 48 |
-
|
| 49 |
-
|
| 50 |
-
|
|
|
|
|
|
|
| 51 |
|
| 52 |
return " ".join(paraphrased_sentences)
|
| 53 |
|
|
@@ -61,8 +69,8 @@ iface = gr.Interface(
|
|
| 61 |
gr.Slider(0.6, 1.0, step=0.05, value=0.9, label="Top-p"),
|
| 62 |
],
|
| 63 |
outputs=gr.Textbox(label="Paraphrased Text"),
|
| 64 |
-
title="Text Rewriter Paraphraser (
|
| 65 |
-
description="
|
| 66 |
)
|
| 67 |
|
| 68 |
iface.launch()
|
|
|
|
| 11 |
model.to(device)
|
| 12 |
model.eval()
|
| 13 |
|
| 14 |
+
# --- Helpers ---
|
| 15 |
def split_sentences(text):
|
| 16 |
sentences = re.split(r'(?<=[.!?])\s+', text.strip())
|
| 17 |
return [s for s in sentences if s]
|
|
|
|
| 22 |
sent += "."
|
| 23 |
return sent
|
| 24 |
|
| 25 |
+
# --- Main function (Batch Processing) ---
|
| 26 |
def paraphrase_fn(text, num_return_sequences=1, temperature=0.8, top_p=0.9):
|
| 27 |
if not text.strip():
|
| 28 |
return "⚠️ Please enter some text"
|
| 29 |
|
|
|
|
| 30 |
sentences = split_sentences(text)
|
| 31 |
+
prompts = ["paraphraser: " + s for s in sentences]
|
| 32 |
+
|
| 33 |
+
inputs = tokenizer(
|
| 34 |
+
prompts,
|
| 35 |
+
return_tensors="pt",
|
| 36 |
+
truncation=True,
|
| 37 |
+
padding=True
|
| 38 |
+
).to(device)
|
| 39 |
|
| 40 |
+
outputs = model.generate(
|
| 41 |
+
**inputs,
|
| 42 |
+
max_new_tokens=64, # smaller for speed
|
| 43 |
+
num_return_sequences=int(num_return_sequences),
|
| 44 |
+
do_sample=True,
|
| 45 |
+
top_p=top_p,
|
| 46 |
+
temperature=temperature,
|
| 47 |
+
no_repeat_ngram_size=2,
|
| 48 |
+
early_stopping=True
|
| 49 |
+
)
|
| 50 |
|
| 51 |
+
decoded = tokenizer.batch_decode(outputs, skip_special_tokens=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 52 |
|
| 53 |
+
# Since we may get multiple return sequences, group by sentence
|
| 54 |
+
paraphrased_sentences = []
|
| 55 |
+
step = int(num_return_sequences)
|
| 56 |
+
for i in range(0, len(decoded), step):
|
| 57 |
+
first_variant = clean_sentence(decoded[i]) # take the first variant only
|
| 58 |
+
paraphrased_sentences.append(first_variant)
|
| 59 |
|
| 60 |
return " ".join(paraphrased_sentences)
|
| 61 |
|
|
|
|
| 69 |
gr.Slider(0.6, 1.0, step=0.05, value=0.9, label="Top-p"),
|
| 70 |
],
|
| 71 |
outputs=gr.Textbox(label="Paraphrased Text"),
|
| 72 |
+
title="Text Rewriter Paraphraser (Batch Optimized)",
|
| 73 |
+
description="Fast paraphrasing powered by T5-base. Now optimized with batch processing 🚀"
|
| 74 |
)
|
| 75 |
|
| 76 |
iface.launch()
|