VictorM-Coder commited on
Commit
f4119b0
·
verified ·
1 Parent(s): 656e7b4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +34 -58
app.py CHANGED
@@ -1,9 +1,7 @@
1
  from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
2
  import torch, gradio as gr, re, random
3
 
4
- # ------------------------
5
- # Load Model (Parrot T5)
6
- # ------------------------
7
  model_name = "prithivida/parrot_paraphraser_on_T5"
8
  tokenizer = AutoTokenizer.from_pretrained(model_name)
9
  model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
@@ -12,14 +10,12 @@ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
12
  model = model.to(device)
13
  model.eval()
14
 
15
- # ------------------------
16
- # Helpers
17
- # ------------------------
18
  def split_paragraphs(text):
19
  return [p.strip() for p in text.split("\n") if p.strip()]
20
 
21
- def split_sentences(paragraph):
22
- return re.split(r'(?<=[.!?])\s+', paragraph.strip())
23
 
24
  def clean_sentence(sent):
25
  sent = re.sub(r'\s+', ' ', sent).strip()
@@ -27,75 +23,55 @@ def clean_sentence(sent):
27
  sent += "."
28
  return sent
29
 
30
- FILLERS = ["actually", "indeed", "essentially", "remarkably", "interestingly", "notably"]
31
-
32
- def maybe_add_filler(sentence, add=False):
33
- """Insert a filler only if flagged."""
34
- if not add:
35
- return sentence
36
- words = sentence.split()
37
- if len(words) > 6:
38
- insert_pos = random.randint(2, min(len(words) - 2, 8))
39
- filler = random.choice(FILLERS)
40
- words.insert(insert_pos, filler)
41
- return " ".join(words)
42
-
43
- # ------------------------
44
- # Main Humanizer
45
- # ------------------------
46
- def humanize_text(text, temperature=1.0, top_p=0.92):
47
  if not text.strip():
48
  return "⚠️ Please enter some text"
49
 
50
  paragraphs = split_paragraphs(text)
51
- final_output = []
52
-
53
- for paragraph in paragraphs:
54
- sentences = split_sentences(paragraph)
55
- paraphrased_sentences = []
56
 
57
- for i, sent in enumerate(sentences):
58
- if not sent.strip():
59
- continue
60
 
61
- # Run through model
62
  input_text = "paraphrase: " + sent + " </s>"
63
  inputs = tokenizer([input_text], return_tensors="pt", truncation=True, padding=True).to(device)
64
 
65
  outputs = model.generate(
66
  **inputs,
67
- max_new_tokens=80,
68
- do_sample=True,
69
- top_p=float(top_p),
70
- temperature=float(temperature)
71
  )
72
 
73
  decoded = tokenizer.decode(outputs[0], skip_special_tokens=True)
74
- decoded = clean_sentence(decoded)
75
-
76
- # Add filler only to one sentence per paragraph
77
- add_filler_here = (i == random.randint(0, max(0, len(sentences)-1)))
78
- final_sentence = maybe_add_filler(decoded, add=add_filler_here)
79
-
80
- paraphrased_sentences.append(final_sentence)
81
 
82
- final_output.append(" ".join(paraphrased_sentences))
 
 
 
83
 
84
- return "\n\n".join(final_output)
85
 
86
- # ------------------------
87
- # Gradio Interface
88
- # ------------------------
89
  iface = gr.Interface(
90
  fn=humanize_text,
91
- inputs=[
92
- gr.Textbox(lines=8, placeholder="Paste text here..."),
93
- gr.Slider(0.5, 2.0, step=0.1, value=1.0, label="Temperature"),
94
- gr.Slider(0.6, 1.0, step=0.01, value=0.92, label="Top-p"),
95
- ],
96
- outputs=gr.Textbox(label="Final Humanized Text"),
97
- title="⚡ Writenix Humanizer (Balanced)",
98
- description="Parrot paraphraser + subtle filler words. Injects fillers only once per paragraph for natural variation."
99
  )
100
 
101
  iface.launch()
 
1
  from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
2
  import torch, gradio as gr, re, random
3
 
4
+ # --- Load Model ---
 
 
5
  model_name = "prithivida/parrot_paraphraser_on_T5"
6
  tokenizer = AutoTokenizer.from_pretrained(model_name)
7
  model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
 
10
  model = model.to(device)
11
  model.eval()
12
 
13
+ # --- Helpers ---
 
 
14
  def split_paragraphs(text):
15
  return [p.strip() for p in text.split("\n") if p.strip()]
16
 
17
+ def split_sentences(text):
18
+ return re.split(r'(?<=[.!?])\s+', text.strip())
19
 
20
  def clean_sentence(sent):
21
  sent = re.sub(r'\s+', ' ', sent).strip()
 
23
  sent += "."
24
  return sent
25
 
26
+ FILLERS = ["in fact", "notably", "interestingly", "remarkably", "as a matter of fact"]
27
+
28
+ def inject_filler(paragraph):
29
+ sentences = split_sentences(paragraph)
30
+ if len(sentences) > 2:
31
+ idx = random.randint(1, len(sentences)-1)
32
+ sentences[idx] = FILLERS[random.randint(0, len(FILLERS)-1)].capitalize() + ", " + sentences[idx][0].lower() + sentences[idx][1:]
33
+ return " ".join(sentences)
34
+
35
+ # --- Main function ---
36
+ def humanize_text(text):
 
 
 
 
 
 
37
  if not text.strip():
38
  return "⚠️ Please enter some text"
39
 
40
  paragraphs = split_paragraphs(text)
41
+ final_paragraphs = []
 
 
 
 
42
 
43
+ for para in paragraphs:
44
+ sentences = split_sentences(para)
45
+ out_sentences = []
46
 
47
+ for sent in sentences:
48
  input_text = "paraphrase: " + sent + " </s>"
49
  inputs = tokenizer([input_text], return_tensors="pt", truncation=True, padding=True).to(device)
50
 
51
  outputs = model.generate(
52
  **inputs,
53
+ max_new_tokens=64,
54
+ num_beams=3, # Faster & more stable than sampling
55
+ do_sample=False # Deterministic, no temp needed
 
56
  )
57
 
58
  decoded = tokenizer.decode(outputs[0], skip_special_tokens=True)
59
+ out_sentences.append(clean_sentence(decoded))
 
 
 
 
 
 
60
 
61
+ # Rebuild paragraph + add filler once
62
+ new_para = " ".join(out_sentences)
63
+ new_para = inject_filler(new_para)
64
+ final_paragraphs.append(new_para)
65
 
66
+ return "\n\n".join(final_paragraphs)
67
 
68
+ # --- Gradio Interface ---
 
 
69
  iface = gr.Interface(
70
  fn=humanize_text,
71
+ inputs=gr.Textbox(lines=10, placeholder="Paste text here..."),
72
+ outputs=gr.Textbox(label="Humanized Output"),
73
+ title="⚡ Writenix Fast Humanizer",
74
+ description="Stable & fast humanizer: deterministic paraphrasing + light filler once per paragraph."
 
 
 
 
75
  )
76
 
77
  iface.launch()