Spaces:

VictorM-Coder
/

Writenixhumanizer

Runtime error

App Files Files Community

VictorM-Coder commited on Sep 10, 2025

Commit

fedaceb

verified ·

1 Parent(s): c417d0a

Update app.py

Browse files

Files changed (1) hide show

app.py +83 -44

app.py CHANGED Viewed

@@ -1,49 +1,88 @@
-import gradio as gr
 from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
 import re
-# Load model
-tokenizer = AutoTokenizer.from_pretrained("Vamsi/T5_Paraphrase_Paws")
-model = AutoModelForSeq2SeqLM.from_pretrained("Vamsi/T5_Paraphrase_Paws")
-# Function to paraphrase a single chunk
-def paraphrase_text(text):
-    input_text = f"paraphrase: {text} </s>"
-    input_ids = tokenizer.encode(input_text, return_tensors="pt", truncation=True)
-    output_ids = model.generate(
-        input_ids,
-        max_length=256,
-        do_sample=True,
-        top_k=120,
-        top_p=0.95,
-        temperature=1.3
-    )
-    return tokenizer.decode(output_ids[0], skip_special_tokens=True)
-# Split text into chunks (4 sentences each)
-def chunk_text(text, max_sentences=4):
-    sentences = re.split(r'(?<=[.!?]) +', text.strip())
-    return [' '.join(sentences[i:i+max_sentences]) for i in range(0, len(sentences), max_sentences)]
-# Paraphrase the full text
-def full_article_paraphrase(text):
-    chunks = chunk_text(text)
-    return "\n\n".join(paraphrase_text(chunk.strip()) for chunk in chunks if chunk.strip())
-# Gradio pipeline
-def paraphrase_pipeline(input_text):
-    if not input_text or len(input_text.strip()) < 10:
-        return "Please enter valid text."
-    return full_article_paraphrase(input_text)
-# Gradio interface
-demo = gr.Interface(
-    fn=paraphrase_pipeline,
-    inputs=gr.Textbox(label="Paste Text Here", lines=20, placeholder="Enter your text..."),
-    outputs=gr.Textbox(label="Paraphrased Text"),
-    title="Smart Paraphraser",
-    description="Paste your text and get paraphrased output instantly."
 )
-if __name__ == "__main__":
-    demo.launch()

 from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
+import torch, gradio as gr
 import re
+# --- Load Model ---
+model_name = "prithivida/parrot_paraphraser_on_T5"
+tokenizer = AutoTokenizer.from_pretrained(model_name)
+model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+model = model.to(device)
+model.eval()
+# --- Helpers ---
+def split_paragraphs(text):
+    """Split text into paragraphs based on line breaks."""
+    paragraphs = [p.strip() for p in text.split("\n") if p.strip()]
+    return paragraphs
+def split_sentences(text):
+    """Split paragraph into sentences."""
+    sentences = re.split(r'(?<=[.!?])\s+', text.strip())
+    return [s for s in sentences if s]
+def clean_sentence(sent):
+    """Clean and ensure sentence ends with punctuation."""
+    sent = re.sub(r'\s+', ' ', sent).strip()
+    if not sent.endswith(('.', '!', '?')):
+        sent += "."
+    return sent
+# --- Main function ---
+def paraphrase_fn(text, num_return_sequences=1, temperature=1.2, top_p=0.92):
+    if not text.strip():
+        return "Enter some text"
+    num_return_sequences = int(num_return_sequences)
+    paragraphs = split_paragraphs(text)
+    paraphrased_paragraphs = []
+    for para in paragraphs:
+        sentences = split_sentences(para)
+        paraphrased_sentences = []
+        for sent in sentences:
+            input_text = "paraphrase: " + sent + " </s>"
+            inputs = tokenizer([input_text], return_tensors="pt", truncation=True, padding=True).to(device)
+            outputs = model.generate(
+                **inputs,
+                max_new_tokens=128,
+                num_return_sequences=num_return_sequences,
+                do_sample=True,
+                top_p=float(top_p),
+                temperature=float(temperature),
+            )
+            decoded = tokenizer.batch_decode(outputs, skip_special_tokens=True)
+            seen, unique = set(), []
+            for d in decoded:
+                d = clean_sentence(d)
+                if d not in seen:
+                    unique.append(d)
+                    seen.add(d)
+            paraphrased_sentences.append(unique[0])
+        # Join sentences for this paragraph
+        paraphrased_paragraphs.append(" ".join(paraphrased_sentences))
+    # Join paragraphs with double line breaks to preserve paragraphing
+    return "\n\n".join(paraphrased_paragraphs)
+# --- Gradio Interface ---
+iface = gr.Interface(
+    fn=paraphrase_fn,
+    inputs=[
+        gr.Textbox(lines=12, placeholder="Paste text here..."),
+        gr.Slider(1, 3, step=1, value=1, label="Variants"),
+        gr.Slider(0.5, 2.0, step=0.1, value=1.2, label="Temperature"),
+        gr.Slider(0.6, 1.0, step=0.01, value=0.92, label="Top-p"),
+    ],
+    outputs=gr.Textbox(label="Output"),
+    title="📝 Writenix API",
+    description="This Space provides a UI *and* an API for paraphrasing text while preserving paragraphs."
 )
+iface.launch()