VictorM-Coder commited on
Commit
d5b9186
·
verified ·
1 Parent(s): aef9a98

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +39 -68
app.py CHANGED
@@ -1,29 +1,22 @@
1
- from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
2
- import torch, gradio as gr, re
3
 
4
  # ------------------------
5
- # Load Models
6
  # ------------------------
7
- # Stage 1: Paraphraser (Parrot)
8
- paraphrase_model_name = "prithivida/parrot_paraphraser_on_T5"
9
- paraphrase_tokenizer = AutoTokenizer.from_pretrained(paraphrase_model_name)
10
- paraphrase_model = AutoModelForSeq2SeqLM.from_pretrained(paraphrase_model_name)
11
-
12
- # Stage 2: Lightweight Expander (flan-t5-small)
13
- expander = pipeline(
14
- "text2text-generation",
15
- model="google/flan-t5-small",
16
- device=0 if torch.cuda.is_available() else -1
17
- )
18
 
19
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
20
- paraphrase_model = paraphrase_model.to(device)
21
- paraphrase_model.eval()
22
 
23
  # ------------------------
24
  # Helpers
25
  # ------------------------
26
  def split_sentences(text):
 
27
  sentences = re.split(r'(?<=[.!?])\s+', text.strip())
28
  return [s for s in sentences if s]
29
 
@@ -33,82 +26,60 @@ def clean_sentence(sent):
33
  sent += "."
34
  return sent
35
 
 
 
 
 
 
 
 
 
 
 
36
  # ------------------------
37
- # Stage 1: Paraphrase
38
  # ------------------------
39
- def paraphrase_fn(text, num_return_sequences=1, temperature=1.2, top_p=0.92):
 
 
 
40
  sentences = split_sentences(text)
41
- all_outputs = []
42
 
43
  for sent in sentences:
44
  input_text = "paraphrase: " + sent + " </s>"
45
- inputs = paraphrase_tokenizer([input_text], return_tensors="pt", truncation=True, padding=True).to(device)
46
 
47
- outputs = paraphrase_model.generate(
48
  **inputs,
49
- max_new_tokens=64,
50
- num_return_sequences=int(num_return_sequences),
51
  do_sample=True,
52
  top_p=float(top_p),
53
- temperature=float(temperature),
54
- min_length=10,
55
- length_penalty=1.0
56
  )
57
- decoded = paraphrase_tokenizer.batch_decode(outputs, skip_special_tokens=True)
58
-
59
- seen, unique = set(), []
60
- for d in decoded:
61
- d = clean_sentence(d)
62
- if d not in seen:
63
- unique.append(d)
64
- seen.add(d)
65
-
66
- if unique:
67
- all_outputs.append(unique[0])
68
-
69
- return " ".join(all_outputs).strip()
70
-
71
- # ------------------------
72
- # Stage 2: Light Expansion
73
- # ------------------------
74
- def expand_text(text, temperature=0.7, top_p=0.9):
75
- expanded = expander(
76
- f"Lightly enhance this text by adding small natural words, transitions, or adjectives (like 'actually', 'quite', 'additionally', 'really'). Do NOT rewrite completely:\n{text}",
77
- max_new_tokens=80,
78
- temperature=float(temperature),
79
- top_p=float(top_p)
80
- )[0]['generated_text']
81
- return expanded.strip()
82
-
83
- # ------------------------
84
- # Final Pipeline
85
- # ------------------------
86
- def humanize_pipeline(text, variants=1, temperature=1.2, top_p=0.92):
87
- if not text.strip():
88
- return "⚠️ Please enter some text"
89
 
90
- # Stage 1: Paraphrase
91
- base = paraphrase_fn(text, num_return_sequences=variants, temperature=temperature, top_p=top_p)
92
 
93
- # Stage 2: Light Expansion
94
- expanded = expand_text(base, temperature=temperature, top_p=top_p)
 
95
 
96
- return expanded
97
 
98
  # ------------------------
99
  # Gradio Interface
100
  # ------------------------
101
  iface = gr.Interface(
102
- fn=humanize_pipeline,
103
  inputs=[
104
  gr.Textbox(lines=8, placeholder="Paste text here..."),
105
- gr.Slider(1, 3, step=1, value=1, label="Variants"),
106
- gr.Slider(0.5, 2.0, step=0.1, value=1.2, label="Temperature"),
107
  gr.Slider(0.6, 1.0, step=0.01, value=0.92, label="Top-p"),
108
  ],
109
  outputs=gr.Textbox(label="Final Humanized Text"),
110
- title="📝 Writenix Humanizer v3 (Light Mode)",
111
- description="Two-stage pipeline: Paraphrase + Subtle Expansion. Adds natural filler words, transitions, and adjectives instead of rewriting everything."
112
  )
113
 
114
  iface.launch()
 
1
+ from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
2
+ import torch, gradio as gr, re, random
3
 
4
  # ------------------------
5
+ # Load Model (Parrot T5)
6
  # ------------------------
7
+ model_name = "prithivida/parrot_paraphraser_on_T5"
8
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
9
+ model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
 
 
 
 
 
 
 
 
10
 
11
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
12
+ model = model.to(device)
13
+ model.eval()
14
 
15
  # ------------------------
16
  # Helpers
17
  # ------------------------
18
  def split_sentences(text):
19
+ # Split by punctuation
20
  sentences = re.split(r'(?<=[.!?])\s+', text.strip())
21
  return [s for s in sentences if s]
22
 
 
26
  sent += "."
27
  return sent
28
 
29
+ FILLERS = ["actually", "indeed", "quite", "essentially", "additionally", "remarkably"]
30
+
31
+ def add_fillers(sentence):
32
+ words = sentence.split()
33
+ if len(words) > 6: # only add if long enough
34
+ insert_pos = random.randint(2, min(len(words)-2, 8))
35
+ filler = random.choice(FILLERS)
36
+ words.insert(insert_pos, filler)
37
+ return " ".join(words)
38
+
39
  # ------------------------
40
+ # Main Humanizer
41
  # ------------------------
42
+ def humanize_text(text, temperature=1.0, top_p=0.92):
43
+ if not text.strip():
44
+ return "⚠️ Please enter some text"
45
+
46
  sentences = split_sentences(text)
47
+ paraphrased_sentences = []
48
 
49
  for sent in sentences:
50
  input_text = "paraphrase: " + sent + " </s>"
51
+ inputs = tokenizer([input_text], return_tensors="pt", truncation=True, padding=True).to(device)
52
 
53
+ outputs = model.generate(
54
  **inputs,
55
+ max_new_tokens=80,
 
56
  do_sample=True,
57
  top_p=float(top_p),
58
+ temperature=float(temperature)
 
 
59
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
60
 
61
+ decoded = tokenizer.decode(outputs[0], skip_special_tokens=True)
62
+ decoded = clean_sentence(decoded)
63
 
64
+ # Add filler word for naturalness
65
+ final_sentence = add_fillers(decoded)
66
+ paraphrased_sentences.append(final_sentence)
67
 
68
+ return " ".join(paraphrased_sentences)
69
 
70
  # ------------------------
71
  # Gradio Interface
72
  # ------------------------
73
  iface = gr.Interface(
74
+ fn=humanize_text,
75
  inputs=[
76
  gr.Textbox(lines=8, placeholder="Paste text here..."),
77
+ gr.Slider(0.5, 2.0, step=0.1, value=1.0, label="Temperature"),
 
78
  gr.Slider(0.6, 1.0, step=0.01, value=0.92, label="Top-p"),
79
  ],
80
  outputs=gr.Textbox(label="Final Humanized Text"),
81
+ title=" Writenix Fast Humanizer",
82
+ description="Fast pipeline: Parrot paraphraser + smart filler injection. Keeps full text, avoids truncation, adds subtle human touch."
83
  )
84
 
85
  iface.launch()