Spaces:

VictorM-Coder
/

Writenixhumanizer

Sleeping

App Files Files Community

VictorM-Coder commited on Sep 10, 2025

Commit

e4ac1f5

verified ·

1 Parent(s): c95d469

Update app.py

Browse files

Files changed (1) hide show

app.py +29 -21

app.py CHANGED Viewed

@@ -11,6 +11,7 @@ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 model.to(device)
 model.eval()
 def split_sentences(text):
     sentences = re.split(r'(?<=[.!?])\s+', text.strip())
     return [s for s in sentences if s]
@@ -21,33 +22,40 @@ def clean_sentence(sent):
         sent += "."
     return sent
 def paraphrase_fn(text, num_return_sequences=1, temperature=0.8, top_p=0.9):
     if not text.strip():
         return "⚠️ Please enter some text"
-    num_return_sequences = int(num_return_sequences)
     sentences = split_sentences(text)
-    paraphrased_sentences = []
-    for sent in sentences:
-        prompt = "paraphraser: " + sent
-        inputs = tokenizer(prompt, return_tensors="pt", truncation=True, padding=True).to(device)
-        outputs = model.generate(
-            **inputs,
-            max_new_tokens=128,
-            num_return_sequences=num_return_sequences,
-            do_sample=True,
-            top_p=top_p,
-            temperature=temperature,
-            no_repeat_ngram_size=2,
-            early_stopping=True
-        )
-        # Take the first unique paraphrase
-        decoded = tokenizer.batch_decode(outputs, skip_special_tokens=True)
-        clean = [clean_sentence(d) for d in decoded]
-        paraphrased_sentences.append(clean[0])
     return " ".join(paraphrased_sentences)
@@ -61,8 +69,8 @@ iface = gr.Interface(
         gr.Slider(0.6, 1.0, step=0.05, value=0.9, label="Top-p"),
     ],
     outputs=gr.Textbox(label="Paraphrased Text"),
-    title="Text Rewriter Paraphraser (T5-Base)",
-    description="High-quality model fine-tuned on 430K examples for natural, non-AI-detectable paraphrasing."
 )
 iface.launch()

 model.to(device)
 model.eval()
+# --- Helpers ---
 def split_sentences(text):
     sentences = re.split(r'(?<=[.!?])\s+', text.strip())
     return [s for s in sentences if s]
         sent += "."
     return sent
+# --- Main function (Batch Processing) ---
 def paraphrase_fn(text, num_return_sequences=1, temperature=0.8, top_p=0.9):
     if not text.strip():
         return "⚠️ Please enter some text"
     sentences = split_sentences(text)
+    prompts = ["paraphraser: " + s for s in sentences]
+    inputs = tokenizer(
+        prompts,
+        return_tensors="pt",
+        truncation=True,
+        padding=True
+    ).to(device)
+    outputs = model.generate(
+        **inputs,
+        max_new_tokens=64,                # smaller for speed
+        num_return_sequences=int(num_return_sequences),
+        do_sample=True,
+        top_p=top_p,
+        temperature=temperature,
+        no_repeat_ngram_size=2,
+        early_stopping=True
+    )
+    decoded = tokenizer.batch_decode(outputs, skip_special_tokens=True)
+    # Since we may get multiple return sequences, group by sentence
+    paraphrased_sentences = []
+    step = int(num_return_sequences)
+    for i in range(0, len(decoded), step):
+        first_variant = clean_sentence(decoded[i])  # take the first variant only
+        paraphrased_sentences.append(first_variant)
     return " ".join(paraphrased_sentences)
         gr.Slider(0.6, 1.0, step=0.05, value=0.9, label="Top-p"),
     ],
     outputs=gr.Textbox(label="Paraphrased Text"),
+    title="Text Rewriter Paraphraser (Batch Optimized)",
+    description="Fast paraphrasing powered by T5-base. Now optimized with batch processing 🚀"
 )
 iface.launch()