VictorM-Coder commited on
Commit
656e7b4
·
verified ·
1 Parent(s): d5b9186

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +44 -28
app.py CHANGED
@@ -15,10 +15,11 @@ model.eval()
15
  # ------------------------
16
  # Helpers
17
  # ------------------------
18
- def split_sentences(text):
19
- # Split by punctuation
20
- sentences = re.split(r'(?<=[.!?])\s+', text.strip())
21
- return [s for s in sentences if s]
 
22
 
23
  def clean_sentence(sent):
24
  sent = re.sub(r'\s+', ' ', sent).strip()
@@ -26,12 +27,15 @@ def clean_sentence(sent):
26
  sent += "."
27
  return sent
28
 
29
- FILLERS = ["actually", "indeed", "quite", "essentially", "additionally", "remarkably"]
30
 
31
- def add_fillers(sentence):
 
 
 
32
  words = sentence.split()
33
- if len(words) > 6: # only add if long enough
34
- insert_pos = random.randint(2, min(len(words)-2, 8))
35
  filler = random.choice(FILLERS)
36
  words.insert(insert_pos, filler)
37
  return " ".join(words)
@@ -43,29 +47,41 @@ def humanize_text(text, temperature=1.0, top_p=0.92):
43
  if not text.strip():
44
  return "⚠️ Please enter some text"
45
 
46
- sentences = split_sentences(text)
47
- paraphrased_sentences = []
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
48
 
49
- for sent in sentences:
50
- input_text = "paraphrase: " + sent + " </s>"
51
- inputs = tokenizer([input_text], return_tensors="pt", truncation=True, padding=True).to(device)
52
 
53
- outputs = model.generate(
54
- **inputs,
55
- max_new_tokens=80,
56
- do_sample=True,
57
- top_p=float(top_p),
58
- temperature=float(temperature)
59
- )
60
 
61
- decoded = tokenizer.decode(outputs[0], skip_special_tokens=True)
62
- decoded = clean_sentence(decoded)
63
 
64
- # Add filler word for naturalness
65
- final_sentence = add_fillers(decoded)
66
- paraphrased_sentences.append(final_sentence)
67
 
68
- return " ".join(paraphrased_sentences)
69
 
70
  # ------------------------
71
  # Gradio Interface
@@ -78,8 +94,8 @@ iface = gr.Interface(
78
  gr.Slider(0.6, 1.0, step=0.01, value=0.92, label="Top-p"),
79
  ],
80
  outputs=gr.Textbox(label="Final Humanized Text"),
81
- title="⚡ Writenix Fast Humanizer",
82
- description="Fast pipeline: Parrot paraphraser + smart filler injection. Keeps full text, avoids truncation, adds subtle human touch."
83
  )
84
 
85
  iface.launch()
 
15
  # ------------------------
16
  # Helpers
17
  # ------------------------
18
+ def split_paragraphs(text):
19
+ return [p.strip() for p in text.split("\n") if p.strip()]
20
+
21
+ def split_sentences(paragraph):
22
+ return re.split(r'(?<=[.!?])\s+', paragraph.strip())
23
 
24
  def clean_sentence(sent):
25
  sent = re.sub(r'\s+', ' ', sent).strip()
 
27
  sent += "."
28
  return sent
29
 
30
+ FILLERS = ["actually", "indeed", "essentially", "remarkably", "interestingly", "notably"]
31
 
32
+ def maybe_add_filler(sentence, add=False):
33
+ """Insert a filler only if flagged."""
34
+ if not add:
35
+ return sentence
36
  words = sentence.split()
37
+ if len(words) > 6:
38
+ insert_pos = random.randint(2, min(len(words) - 2, 8))
39
  filler = random.choice(FILLERS)
40
  words.insert(insert_pos, filler)
41
  return " ".join(words)
 
47
  if not text.strip():
48
  return "⚠️ Please enter some text"
49
 
50
+ paragraphs = split_paragraphs(text)
51
+ final_output = []
52
+
53
+ for paragraph in paragraphs:
54
+ sentences = split_sentences(paragraph)
55
+ paraphrased_sentences = []
56
+
57
+ for i, sent in enumerate(sentences):
58
+ if not sent.strip():
59
+ continue
60
+
61
+ # Run through model
62
+ input_text = "paraphrase: " + sent + " </s>"
63
+ inputs = tokenizer([input_text], return_tensors="pt", truncation=True, padding=True).to(device)
64
+
65
+ outputs = model.generate(
66
+ **inputs,
67
+ max_new_tokens=80,
68
+ do_sample=True,
69
+ top_p=float(top_p),
70
+ temperature=float(temperature)
71
+ )
72
 
73
+ decoded = tokenizer.decode(outputs[0], skip_special_tokens=True)
74
+ decoded = clean_sentence(decoded)
 
75
 
76
+ # Add filler only to one sentence per paragraph
77
+ add_filler_here = (i == random.randint(0, max(0, len(sentences)-1)))
78
+ final_sentence = maybe_add_filler(decoded, add=add_filler_here)
 
 
 
 
79
 
80
+ paraphrased_sentences.append(final_sentence)
 
81
 
82
+ final_output.append(" ".join(paraphrased_sentences))
 
 
83
 
84
+ return "\n\n".join(final_output)
85
 
86
  # ------------------------
87
  # Gradio Interface
 
94
  gr.Slider(0.6, 1.0, step=0.01, value=0.92, label="Top-p"),
95
  ],
96
  outputs=gr.Textbox(label="Final Humanized Text"),
97
+ title="⚡ Writenix Humanizer (Balanced)",
98
+ description="Parrot paraphraser + subtle filler words. Injects fillers only once per paragraph for natural variation."
99
  )
100
 
101
  iface.launch()