VictorM-Coder commited on
Commit
da07e2a
·
verified ·
1 Parent(s): 33b7120

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +30 -29
app.py CHANGED
@@ -14,52 +14,53 @@ model.eval()
14
  def split_paragraphs(text):
15
  return [p.strip() for p in text.split("\n") if p.strip()]
16
 
 
 
 
17
  def clean_text(text):
18
- text = re.sub(r'\s+', ' ', text).strip()
19
- return text
 
 
 
 
 
 
 
 
 
20
 
21
  # --- Main Humanizer ---
22
- def humanize_text(text, variants=1, temperature=1.0, top_p=0.92):
23
  if not text.strip():
24
  return "⚠️ Please enter some text"
25
 
26
  paragraphs = split_paragraphs(text)
27
- final_output = []
28
 
29
  for para in paragraphs:
30
- input_text = "paraphrase: " + para + " </s>"
31
- inputs = tokenizer([input_text], return_tensors="pt", truncation=True, padding=True).to(device)
32
-
33
- outputs = model.generate(
34
- **inputs,
35
- max_new_tokens=256, # More room per paragraph
36
- num_return_sequences=variants,
37
- do_sample=True,
38
- temperature=float(temperature),
39
- top_p=float(top_p),
40
- num_beams=4, # balance between stable & creative
41
- )
42
 
43
- decoded = tokenizer.batch_decode(outputs, skip_special_tokens=True)
44
- cleaned = [clean_text(d) for d in decoded]
 
45
 
46
- # Pick the first variant for simplicity
47
- final_output.append(cleaned[0])
 
 
 
48
 
49
- return "\n\n".join(final_output)
50
 
51
  # --- Gradio Interface ---
52
  iface = gr.Interface(
53
  fn=humanize_text,
54
- inputs=[
55
- gr.Textbox(lines=10, placeholder="Paste text here..."),
56
- gr.Slider(1, 3, step=1, value=1, label="Variants"),
57
- gr.Slider(0.5, 2.0, step=0.1, value=1.0, label="Temperature"),
58
- gr.Slider(0.6, 1.0, step=0.01, value=0.92, label="Top-p"),
59
- ],
60
  outputs=gr.Textbox(label="Humanized Output"),
61
- title="✨ Writenix Humanizer Pro",
62
- description="Paragraph-level paraphrasing with better flow and context. Combines beam search with sampling for more natural results."
63
  )
64
 
65
  iface.launch()
 
14
  def split_paragraphs(text):
15
  return [p.strip() for p in text.split("\n") if p.strip()]
16
 
17
+ def split_sentences(text):
18
+ return re.split(r'(?<=[.!?])\s+', text.strip())
19
+
20
  def clean_text(text):
21
+ return re.sub(r'\s+', ' ', text).strip()
22
+
23
+ def paraphrase_chunk(text_chunk):
24
+ inputs = tokenizer([text_chunk], return_tensors="pt", truncation=True, padding=True).to(device)
25
+ outputs = model.generate(
26
+ **inputs,
27
+ max_new_tokens=100, # small chunks only
28
+ num_beams=4,
29
+ do_sample=False
30
+ )
31
+ return clean_text(tokenizer.decode(outputs[0], skip_special_tokens=True))
32
 
33
  # --- Main Humanizer ---
34
+ def humanize_text(text):
35
  if not text.strip():
36
  return "⚠️ Please enter some text"
37
 
38
  paragraphs = split_paragraphs(text)
39
+ humanized_paragraphs = []
40
 
41
  for para in paragraphs:
42
+ sentences = split_sentences(para)
43
+ paraphrased_sentences = []
 
 
 
 
 
 
 
 
 
 
44
 
45
+ # Paraphrase each sentence separately for accuracy
46
+ for sent in sentences:
47
+ paraphrased_sentences.append(paraphrase_chunk("paraphrase: " + sent))
48
 
49
+ # Rebuild paragraph and optionally add light filler once
50
+ new_para = " ".join(paraphrased_sentences)
51
+ if len(paraphrased_sentences) > 2:
52
+ new_para = "In fact, " + new_para[0].lower() + new_para[1:]
53
+ humanized_paragraphs.append(new_para)
54
 
55
+ return "\n\n".join(humanized_paragraphs)
56
 
57
  # --- Gradio Interface ---
58
  iface = gr.Interface(
59
  fn=humanize_text,
60
+ inputs=gr.Textbox(lines=15, placeholder="Paste text here..."),
 
 
 
 
 
61
  outputs=gr.Textbox(label="Humanized Output"),
62
+ title="✨ Writenix Humanizer v3",
63
+ description="Paraphrases large text paragraph-by-paragraph for natural flow, keeps full content, adds light filler per paragraph."
64
  )
65
 
66
  iface.launch()