VictorM-Coder commited on
Commit
33b7120
·
verified ·
1 Parent(s): f4119b0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +31 -43
app.py CHANGED
@@ -1,5 +1,5 @@
1
  from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
2
- import torch, gradio as gr, re, random
3
 
4
  # --- Load Model ---
5
  model_name = "prithivida/parrot_paraphraser_on_T5"
@@ -14,64 +14,52 @@ model.eval()
14
  def split_paragraphs(text):
15
  return [p.strip() for p in text.split("\n") if p.strip()]
16
 
17
- def split_sentences(text):
18
- return re.split(r'(?<=[.!?])\s+', text.strip())
 
19
 
20
- def clean_sentence(sent):
21
- sent = re.sub(r'\s+', ' ', sent).strip()
22
- if not sent.endswith(('.', '!', '?')):
23
- sent += "."
24
- return sent
25
-
26
- FILLERS = ["in fact", "notably", "interestingly", "remarkably", "as a matter of fact"]
27
-
28
- def inject_filler(paragraph):
29
- sentences = split_sentences(paragraph)
30
- if len(sentences) > 2:
31
- idx = random.randint(1, len(sentences)-1)
32
- sentences[idx] = FILLERS[random.randint(0, len(FILLERS)-1)].capitalize() + ", " + sentences[idx][0].lower() + sentences[idx][1:]
33
- return " ".join(sentences)
34
-
35
- # --- Main function ---
36
- def humanize_text(text):
37
  if not text.strip():
38
  return "⚠️ Please enter some text"
39
 
40
  paragraphs = split_paragraphs(text)
41
- final_paragraphs = []
42
 
43
  for para in paragraphs:
44
- sentences = split_sentences(para)
45
- out_sentences = []
46
-
47
- for sent in sentences:
48
- input_text = "paraphrase: " + sent + " </s>"
49
- inputs = tokenizer([input_text], return_tensors="pt", truncation=True, padding=True).to(device)
50
 
51
- outputs = model.generate(
52
- **inputs,
53
- max_new_tokens=64,
54
- num_beams=3, # Faster & more stable than sampling
55
- do_sample=False # Deterministic, no temp needed
56
- )
 
 
 
57
 
58
- decoded = tokenizer.decode(outputs[0], skip_special_tokens=True)
59
- out_sentences.append(clean_sentence(decoded))
60
 
61
- # Rebuild paragraph + add filler once
62
- new_para = " ".join(out_sentences)
63
- new_para = inject_filler(new_para)
64
- final_paragraphs.append(new_para)
65
 
66
- return "\n\n".join(final_paragraphs)
67
 
68
  # --- Gradio Interface ---
69
  iface = gr.Interface(
70
  fn=humanize_text,
71
- inputs=gr.Textbox(lines=10, placeholder="Paste text here..."),
 
 
 
 
 
72
  outputs=gr.Textbox(label="Humanized Output"),
73
- title=" Writenix Fast Humanizer",
74
- description="Stable & fast humanizer: deterministic paraphrasing + light filler once per paragraph."
75
  )
76
 
77
  iface.launch()
 
1
  from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
2
+ import torch, gradio as gr, re
3
 
4
  # --- Load Model ---
5
  model_name = "prithivida/parrot_paraphraser_on_T5"
 
14
  def split_paragraphs(text):
15
  return [p.strip() for p in text.split("\n") if p.strip()]
16
 
17
+ def clean_text(text):
18
+ text = re.sub(r'\s+', ' ', text).strip()
19
+ return text
20
 
21
+ # --- Main Humanizer ---
22
+ def humanize_text(text, variants=1, temperature=1.0, top_p=0.92):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23
  if not text.strip():
24
  return "⚠️ Please enter some text"
25
 
26
  paragraphs = split_paragraphs(text)
27
+ final_output = []
28
 
29
  for para in paragraphs:
30
+ input_text = "paraphrase: " + para + " </s>"
31
+ inputs = tokenizer([input_text], return_tensors="pt", truncation=True, padding=True).to(device)
 
 
 
 
32
 
33
+ outputs = model.generate(
34
+ **inputs,
35
+ max_new_tokens=256, # More room per paragraph
36
+ num_return_sequences=variants,
37
+ do_sample=True,
38
+ temperature=float(temperature),
39
+ top_p=float(top_p),
40
+ num_beams=4, # balance between stable & creative
41
+ )
42
 
43
+ decoded = tokenizer.batch_decode(outputs, skip_special_tokens=True)
44
+ cleaned = [clean_text(d) for d in decoded]
45
 
46
+ # Pick the first variant for simplicity
47
+ final_output.append(cleaned[0])
 
 
48
 
49
+ return "\n\n".join(final_output)
50
 
51
  # --- Gradio Interface ---
52
  iface = gr.Interface(
53
  fn=humanize_text,
54
+ inputs=[
55
+ gr.Textbox(lines=10, placeholder="Paste text here..."),
56
+ gr.Slider(1, 3, step=1, value=1, label="Variants"),
57
+ gr.Slider(0.5, 2.0, step=0.1, value=1.0, label="Temperature"),
58
+ gr.Slider(0.6, 1.0, step=0.01, value=0.92, label="Top-p"),
59
+ ],
60
  outputs=gr.Textbox(label="Humanized Output"),
61
+ title=" Writenix Humanizer Pro",
62
+ description="Paragraph-level paraphrasing with better flow and context. Combines beam search with sampling for more natural results."
63
  )
64
 
65
  iface.launch()