Spaces:

VictorM-Coder
/

Writenixhumanizer

Sleeping

App Files Files Community

VictorM-Coder commited on Sep 10, 2025

Commit

c63aa57

verified ·

1 Parent(s): dc81ef5

Update app.py

Browse files

Files changed (1) hide show

app.py +93 -47

app.py CHANGED Viewed

@@ -1,68 +1,114 @@
-from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
-import torch, gradio as gr
-import re
-# Load Model (lighter + faster)
-model_name = "humarin/chatgpt_paraphraser_on_T5_base"
-tokenizer = AutoTokenizer.from_pretrained(model_name)
-model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-model = model.to(device)
-model.eval()
-# --- Helpers ---
-def split_sentences(paragraph):
-    # Split into sentences based on punctuation + space
-    sentences = re.split(r'(?<=[.!?])\s+', paragraph.strip())
     return [s for s in sentences if s]
-# --- Main Paraphrasing Function ---
-def paraphrase_t5(text, temperature=0.9, top_p=0.92):
-    if not text.strip():
-        return "⚠️ Please enter some text"
-    paragraphs = [p.strip() for p in text.split("\n") if p.strip()]
-    paraphrased_paragraphs = []
-    for p in paragraphs:
-        sentences = split_sentences(p)
-        paraphrased_sentences = []
-        for s in sentences:
-            prompt = f"Paraphrase this in a more natural, human style while keeping meaning:\n{s}"
-            inputs = tokenizer([prompt], return_tensors="pt", truncation=True, padding=True).to(device)
-            outputs = model.generate(
-                **inputs,
-                max_new_tokens=512,   # allow long outputs
-                do_sample=True,
-                top_p=float(top_p),
-                temperature=float(temperature),
-                num_return_sequences=1,
-                no_repeat_ngram_size=3
-            )
-            paraphrased = tokenizer.decode(outputs[0], skip_special_tokens=True).strip()
-            paraphrased_sentences.append(paraphrased)
-        # Rejoin sentences into a paragraph
-        paraphrased_paragraphs.append(" ".join(paraphrased_sentences))
-    return "\n\n".join(paraphrased_paragraphs)
-# --- Gradio UI ---
 iface = gr.Interface(
-    fn=paraphrase_t5,
     inputs=[
-        gr.Textbox(lines=8, placeholder="Paste full text here..."),
-        gr.Slider(0.5, 1.5, step=0.1, value=0.9, label="Temperature"),
-        gr.Slider(0.6, 1.0, step=0.02, value=0.92, label="Top-p")
     ],
-    outputs=gr.Textbox(label="Paraphrased & Humanized Text"),
-    title="T5-Base Paraphraser (Humanizer)",
-    description="Fast, high-quality paraphrasing on T5-base, tuned for human-like rewrites."
 )
 iface.launch()

+from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
+import torch, gradio as gr, re
+# ------------------------
+# Load Models
+# ------------------------
+# Stage 1: Paraphraser
+paraphrase_model_name = "prithivida/parrot_paraphraser_on_T5"
+paraphrase_tokenizer = AutoTokenizer.from_pretrained(paraphrase_model_name)
+paraphrase_model = AutoModelForSeq2SeqLM.from_pretrained(paraphrase_model_name)
+# Stage 2: Expander (Flan-T5-Large)
+expander = pipeline(
+    "text2text-generation",
+    model="google/flan-t5-large",
+    device=0 if torch.cuda.is_available() else -1
+)
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+paraphrase_model = paraphrase_model.to(device)
+paraphrase_model.eval()
+# ------------------------
+# Helpers
+# ------------------------
+def split_sentences(text):
+    sentences = re.split(r'(?<=[.!?])\s+', text.strip())
     return [s for s in sentences if s]
+def clean_sentence(sent):
+    sent = re.sub(r'\s+', ' ', sent).strip()
+    if not sent.endswith(('.', '!', '?')):
+        sent += "."
+    return sent
+# ------------------------
+# Stage 1: Paraphrase
+# ------------------------
+def paraphrase_fn(text, num_return_sequences=1, temperature=1.2, top_p=0.92):
+    sentences = split_sentences(text)
+    all_outputs = []
+    for sent in sentences:
+        input_text = "paraphrase: " + sent + " </s>"
+        inputs = paraphrase_tokenizer([input_text], return_tensors="pt", truncation=True, padding=True).to(device)
+        outputs = paraphrase_model.generate(
+            **inputs,
+            max_new_tokens=128,
+            num_return_sequences=int(num_return_sequences),
+            do_sample=True,
+            top_p=float(top_p),
+            temperature=float(temperature),
+            min_length=20,
+            length_penalty=1.2
+        )
+        decoded = paraphrase_tokenizer.batch_decode(outputs, skip_special_tokens=True)
+        seen, unique = set(), []
+        for d in decoded:
+            d = clean_sentence(d)
+            if d not in seen:
+                unique.append(d)
+                seen.add(d)
+        if unique:
+            all_outputs.append(unique[0])
+    return " ".join(all_outputs).strip()
+# ------------------------
+# Stage 2: Expansion
+# ------------------------
+def expand_text(text, temperature=0.9, top_p=0.95):
+    expanded = expander(
+        f"Expand and make this text more detailed, natural, and human-like:\n{text}",
+        max_new_tokens=250,
+        temperature=float(temperature),
+        top_p=float(top_p)
+    )[0]['generated_text']
+    return expanded
+# ------------------------
+# Final Pipeline
+# ------------------------
+def humanize_pipeline(text, variants=1, temperature=1.2, top_p=0.92):
+    if not text.strip():
+        return "⚠️ Please enter some text"
+    # Stage 1: Paraphrase
+    base = paraphrase_fn(text, num_return_sequences=variants, temperature=temperature, top_p=top_p)
+    # Stage 2: Expand & Smooth
+    expanded = expand_text(base, temperature=temperature, top_p=top_p)
+    return expanded
+# ------------------------
+# Gradio Interface
+# ------------------------
 iface = gr.Interface(
+    fn=humanize_pipeline,
     inputs=[
+        gr.Textbox(lines=8, placeholder="Paste text here..."),
+        gr.Slider(1, 3, step=1, value=1, label="Variants"),
+        gr.Slider(0.5, 2.0, step=0.1, value=1.2, label="Temperature"),
+        gr.Slider(0.6, 1.0, step=0.01, value=0.92, label="Top-p"),
     ],
+    outputs=gr.Textbox(label="Final Humanized Text"),
+    title="📝 Writenix Humanizer v2",
+    description="Two-stage pipeline: Paraphrase + Expand. Produces longer, more natural, human-like rewrites that are harder to detect."
 )
 iface.launch()