Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,76 +1,36 @@
|
|
| 1 |
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
|
| 2 |
import torch, gradio as gr
|
| 3 |
-
import re
|
| 4 |
|
| 5 |
-
|
| 6 |
-
model_name = "Ateeqq/Text-Rewriter-Paraphraser"
|
| 7 |
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
| 8 |
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
|
| 9 |
|
| 10 |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
| 11 |
-
model.to(device)
|
| 12 |
model.eval()
|
| 13 |
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
sentences = re.split(r'(?<=[.!?])\s+', text.strip())
|
| 17 |
-
return [s for s in sentences if s]
|
| 18 |
-
|
| 19 |
-
def clean_sentence(sent):
|
| 20 |
-
sent = re.sub(r'\s+', ' ', sent).strip()
|
| 21 |
-
if not sent.endswith(('.', '!', '?')):
|
| 22 |
-
sent += "."
|
| 23 |
-
return sent
|
| 24 |
-
|
| 25 |
-
# --- Main function (Batch Processing) ---
|
| 26 |
-
def paraphrase_fn(text, num_return_sequences=1, temperature=0.8, top_p=0.9):
|
| 27 |
-
if not text.strip():
|
| 28 |
-
return "⚠️ Please enter some text"
|
| 29 |
-
|
| 30 |
-
sentences = split_sentences(text)
|
| 31 |
-
prompts = ["paraphraser: " + s for s in sentences]
|
| 32 |
-
|
| 33 |
-
inputs = tokenizer(
|
| 34 |
-
prompts,
|
| 35 |
-
return_tensors="pt",
|
| 36 |
-
truncation=True,
|
| 37 |
-
padding=True
|
| 38 |
-
).to(device)
|
| 39 |
-
|
| 40 |
outputs = model.generate(
|
| 41 |
**inputs,
|
| 42 |
-
max_new_tokens=
|
| 43 |
-
|
|
|
|
|
|
|
| 44 |
do_sample=True,
|
| 45 |
-
top_p=top_p,
|
| 46 |
-
temperature=temperature,
|
| 47 |
-
no_repeat_ngram_size=2,
|
| 48 |
-
early_stopping=True
|
| 49 |
)
|
|
|
|
| 50 |
|
| 51 |
-
decoded = tokenizer.batch_decode(outputs, skip_special_tokens=True)
|
| 52 |
-
|
| 53 |
-
# Since we may get multiple return sequences, group by sentence
|
| 54 |
-
paraphrased_sentences = []
|
| 55 |
-
step = int(num_return_sequences)
|
| 56 |
-
for i in range(0, len(decoded), step):
|
| 57 |
-
first_variant = clean_sentence(decoded[i]) # take the first variant only
|
| 58 |
-
paraphrased_sentences.append(first_variant)
|
| 59 |
-
|
| 60 |
-
return " ".join(paraphrased_sentences)
|
| 61 |
-
|
| 62 |
-
# --- Gradio Interface ---
|
| 63 |
iface = gr.Interface(
|
| 64 |
-
fn=
|
| 65 |
inputs=[
|
| 66 |
-
gr.Textbox(lines=8, placeholder="Paste text here..."),
|
| 67 |
-
gr.Slider(
|
| 68 |
-
gr.Slider(0.
|
| 69 |
-
gr.Slider(0.6, 1.0, step=0.05, value=0.9, label="Top-p"),
|
| 70 |
],
|
| 71 |
-
outputs=gr.Textbox(label="Paraphrased Text"),
|
| 72 |
-
title="
|
| 73 |
-
description="
|
| 74 |
)
|
| 75 |
|
| 76 |
iface.launch()
|
|
|
|
| 1 |
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
|
| 2 |
import torch, gradio as gr
|
|
|
|
| 3 |
|
| 4 |
+
model_name = "cPower/dipper-paraphraser-xxl-tokeninc"
|
|
|
|
| 5 |
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
| 6 |
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
|
| 7 |
|
| 8 |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
| 9 |
+
model = model.to(device)
|
| 10 |
model.eval()
|
| 11 |
|
| 12 |
+
def paraphrase_dipper(text, diversity=0.5, reordering=0.5):
|
| 13 |
+
inputs = tokenizer([text], return_tensors="pt", truncation=True, padding=True).to(device)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 14 |
outputs = model.generate(
|
| 15 |
**inputs,
|
| 16 |
+
max_new_tokens=256,
|
| 17 |
+
top_p=1.0,
|
| 18 |
+
diversity_penalty=float(diversity),
|
| 19 |
+
num_reorder=float(reordering),
|
| 20 |
do_sample=True,
|
|
|
|
|
|
|
|
|
|
|
|
|
| 21 |
)
|
| 22 |
+
return tokenizer.decode(outputs[0], skip_special_tokens=True)
|
| 23 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 24 |
iface = gr.Interface(
|
| 25 |
+
fn=paraphrase_dipper,
|
| 26 |
inputs=[
|
| 27 |
+
gr.Textbox(lines=8, placeholder="Paste full text here..."),
|
| 28 |
+
gr.Slider(0.0, 1.0, step=0.1, value=0.5, label="Lexical Diversity"),
|
| 29 |
+
gr.Slider(0.0, 1.0, step=0.1, value=0.5, label="Reordering Amount")
|
|
|
|
| 30 |
],
|
| 31 |
+
outputs=gr.Textbox(label="Paraphrased & Humanized Text"),
|
| 32 |
+
title="DIPPER Paraphraser (AI-Detector Evading)",
|
| 33 |
+
description="Paraphrase full text with diversity and reordering control to reduce AI detection."
|
| 34 |
)
|
| 35 |
|
| 36 |
iface.launch()
|