Spaces:

VictorM-Coder
/

Writenixhumanizer

Sleeping

App Files Files Community

VictorM-Coder commited on Sep 10, 2025

Commit

d5b9186

verified ·

1 Parent(s): aef9a98

Update app.py

Browse files

Files changed (1) hide show

app.py +39 -68

app.py CHANGED Viewed

@@ -1,29 +1,22 @@
-from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
-import torch, gradio as gr, re
 # ------------------------
-# Load Models
 # ------------------------
-# Stage 1: Paraphraser (Parrot)
-paraphrase_model_name = "prithivida/parrot_paraphraser_on_T5"
-paraphrase_tokenizer = AutoTokenizer.from_pretrained(paraphrase_model_name)
-paraphrase_model = AutoModelForSeq2SeqLM.from_pretrained(paraphrase_model_name)
-# Stage 2: Lightweight Expander (flan-t5-small)
-expander = pipeline(
-    "text2text-generation",
-    model="google/flan-t5-small",
-    device=0 if torch.cuda.is_available() else -1
-)
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-paraphrase_model = paraphrase_model.to(device)
-paraphrase_model.eval()
 # ------------------------
 # Helpers
 # ------------------------
 def split_sentences(text):
     sentences = re.split(r'(?<=[.!?])\s+', text.strip())
     return [s for s in sentences if s]
@@ -33,82 +26,60 @@ def clean_sentence(sent):
         sent += "."
     return sent
 # ------------------------
-# Stage 1: Paraphrase
 # ------------------------
-def paraphrase_fn(text, num_return_sequences=1, temperature=1.2, top_p=0.92):
     sentences = split_sentences(text)
-    all_outputs = []
     for sent in sentences:
         input_text = "paraphrase: " + sent + " </s>"
-        inputs = paraphrase_tokenizer([input_text], return_tensors="pt", truncation=True, padding=True).to(device)
-        outputs = paraphrase_model.generate(
             **inputs,
-            max_new_tokens=64,
-            num_return_sequences=int(num_return_sequences),
             do_sample=True,
             top_p=float(top_p),
-            temperature=float(temperature),
-            min_length=10,
-            length_penalty=1.0
         )
-        decoded = paraphrase_tokenizer.batch_decode(outputs, skip_special_tokens=True)
-        seen, unique = set(), []
-        for d in decoded:
-            d = clean_sentence(d)
-            if d not in seen:
-                unique.append(d)
-                seen.add(d)
-        if unique:
-            all_outputs.append(unique[0])
-    return " ".join(all_outputs).strip()
-# ------------------------
-# Stage 2: Light Expansion
-# ------------------------
-def expand_text(text, temperature=0.7, top_p=0.9):
-    expanded = expander(
-        f"Lightly enhance this text by adding small natural words, transitions, or adjectives (like 'actually', 'quite', 'additionally', 'really'). Do NOT rewrite completely:\n{text}",
-        max_new_tokens=80,
-        temperature=float(temperature),
-        top_p=float(top_p)
-    )[0]['generated_text']
-    return expanded.strip()
-# ------------------------
-# Final Pipeline
-# ------------------------
-def humanize_pipeline(text, variants=1, temperature=1.2, top_p=0.92):
-    if not text.strip():
-        return "⚠️ Please enter some text"
-    # Stage 1: Paraphrase
-    base = paraphrase_fn(text, num_return_sequences=variants, temperature=temperature, top_p=top_p)
-    # Stage 2: Light Expansion
-    expanded = expand_text(base, temperature=temperature, top_p=top_p)
-    return expanded
 # ------------------------
 # Gradio Interface
 # ------------------------
 iface = gr.Interface(
-    fn=humanize_pipeline,
     inputs=[
         gr.Textbox(lines=8, placeholder="Paste text here..."),
-        gr.Slider(1, 3, step=1, value=1, label="Variants"),
-        gr.Slider(0.5, 2.0, step=0.1, value=1.2, label="Temperature"),
         gr.Slider(0.6, 1.0, step=0.01, value=0.92, label="Top-p"),
     ],
     outputs=gr.Textbox(label="Final Humanized Text"),
-    title="📝 Writenix Humanizer v3 (Light Mode)",
-    description="Two-stage pipeline: Paraphrase + Subtle Expansion. Adds natural filler words, transitions, and adjectives instead of rewriting everything."
 )
 iface.launch()

+from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
+import torch, gradio as gr, re, random
 # ------------------------
+# Load Model (Parrot T5)
 # ------------------------
+model_name = "prithivida/parrot_paraphraser_on_T5"
+tokenizer = AutoTokenizer.from_pretrained(model_name)
+model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+model = model.to(device)
+model.eval()
 # ------------------------
 # Helpers
 # ------------------------
 def split_sentences(text):
+    # Split by punctuation
     sentences = re.split(r'(?<=[.!?])\s+', text.strip())
     return [s for s in sentences if s]
         sent += "."
     return sent
+FILLERS = ["actually", "indeed", "quite", "essentially", "additionally", "remarkably"]
+def add_fillers(sentence):
+    words = sentence.split()
+    if len(words) > 6:  # only add if long enough
+        insert_pos = random.randint(2, min(len(words)-2, 8))
+        filler = random.choice(FILLERS)
+        words.insert(insert_pos, filler)
+    return " ".join(words)
 # ------------------------
+# Main Humanizer
 # ------------------------
+def humanize_text(text, temperature=1.0, top_p=0.92):
+    if not text.strip():
+        return "⚠️ Please enter some text"
     sentences = split_sentences(text)
+    paraphrased_sentences = []
     for sent in sentences:
         input_text = "paraphrase: " + sent + " </s>"
+        inputs = tokenizer([input_text], return_tensors="pt", truncation=True, padding=True).to(device)
+        outputs = model.generate(
             **inputs,
+            max_new_tokens=80,
             do_sample=True,
             top_p=float(top_p),
+            temperature=float(temperature)
         )
+        decoded = tokenizer.decode(outputs[0], skip_special_tokens=True)
+        decoded = clean_sentence(decoded)
+        # Add filler word for naturalness
+        final_sentence = add_fillers(decoded)
+        paraphrased_sentences.append(final_sentence)
+    return " ".join(paraphrased_sentences)
 # ------------------------
 # Gradio Interface
 # ------------------------
 iface = gr.Interface(
+    fn=humanize_text,
     inputs=[
         gr.Textbox(lines=8, placeholder="Paste text here..."),
+        gr.Slider(0.5, 2.0, step=0.1, value=1.0, label="Temperature"),
         gr.Slider(0.6, 1.0, step=0.01, value=0.92, label="Top-p"),
     ],
     outputs=gr.Textbox(label="Final Humanized Text"),
+    title="⚡ Writenix Fast Humanizer",
+    description="Fast pipeline: Parrot paraphraser + smart filler injection. Keeps full text, avoids truncation, adds subtle human touch."
 )
 iface.launch()