tyfsadik commited on
Commit
ca86bd0
Β·
verified Β·
1 Parent(s): db9cc02

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +1451 -380
app.py CHANGED
@@ -1,418 +1,1489 @@
1
- # app.py - Advanced Deep Humanizer for Hugging Face Spaces
2
- # Compatible with Gradio 4.0.0 and Python 3.13
3
- # Optimized for Zero-GPU / GPU hybrid deployment
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4
 
5
  import gradio as gr
 
6
  import torch
7
- import random
8
- import re
9
- import os
10
- import gc
11
  from transformers import (
12
- AutoModelForCausalLM,
13
- AutoTokenizer,
14
- BitsAndBytesConfig
 
 
 
15
  )
16
- from typing import Dict, Tuple
17
- import numpy as np
18
- from dataclasses import dataclass
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
 
 
 
 
20
  @dataclass
21
- class HumanizationConfig:
22
- temperature: float = 0.8
23
- top_p: float = 0.92
24
- repetition_penalty: float = 1.15
25
- style_intensity: str = "medium"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
  preserve_meaning: bool = True
27
  add_imperfections: bool = True
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28
 
29
- class DeepHumanizer:
30
  def __init__(self):
31
- # Using Qwen2.5-7B for best quality/speed on HF Spaces
32
- # This works on T4 (16GB) GPU or CPU with quantization
33
- self.model_id = "Qwen/Qwen2.5-7B-Instruct"
34
-
35
- self.tokenizer = None
36
- self.model = None
37
  self.device = "cuda" if torch.cuda.is_available() else "cpu"
38
-
39
- if self.device == "cuda":
40
- print(f"πŸš€ GPU available: {torch.cuda.get_device_name(0)}")
41
- print(f"πŸ“Š VRAM: {torch.cuda.get_device_properties(0).total_memory / 1e9:.1f} GB")
42
- else:
43
- print("⚠️ Running on CPU - will use lighter quantization")
44
-
45
- self.initialize_model()
46
-
47
- def initialize_model(self):
48
- """Initialize with robust memory management"""
49
- print(f"Loading {self.model_id}...")
50
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
51
  try:
52
- self.tokenizer = AutoTokenizer.from_pretrained(
53
- self.model_id,
54
- trust_remote_code=True,
55
- padding_side="left"
56
  )
57
-
58
- if self.tokenizer.pad_token is None:
59
- self.tokenizer.pad_token = self.tokenizer.eos_token
60
-
61
- # Memory-aware loading
62
- if self.device == "cuda":
63
- vram = torch.cuda.get_device_properties(0).total_memory / 1e9
64
-
65
- if vram > 40: # A100 40GB/80GB
66
- # Can handle 7B in 4-bit easily
67
- quantization_config = BitsAndBytesConfig(
68
- load_in_4bit=True,
69
- bnb_4bit_compute_dtype=torch.float16,
70
- bnb_4bit_quant_type="nf4",
71
- bnb_4bit_use_double_quant=True,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
72
  )
73
- self.model = AutoModelForCausalLM.from_pretrained(
74
- self.model_id,
75
- quantization_config=quantization_config,
 
 
 
76
  device_map="auto",
 
77
  trust_remote_code=True,
78
- torch_dtype=torch.float16,
79
- low_cpu_mem_usage=True
80
  )
81
- elif vram > 15: # T4 16GB or similar
82
- # 4-bit for safety
83
- quantization_config = BitsAndBytesConfig(
84
- load_in_4bit=True,
85
- bnb_4bit_compute_dtype=torch.float16,
86
- bnb_4bit_quant_type="nf4",
87
- bnb_4bit_use_double_quant=True,
88
  )
89
- self.model = AutoModelForCausalLM.from_pretrained(
90
- self.model_id,
91
- quantization_config=quantization_config,
 
 
 
 
 
 
 
92
  device_map="auto",
93
- trust_remote_code=True,
94
- low_cpu_mem_usage=True
95
  )
96
- else:
97
- # 8-bit for safety on smaller GPUs
98
- self.model = AutoModelForCausalLM.from_pretrained(
99
- self.model_id,
100
- load_in_8bit=True,
101
  device_map="auto",
102
- trust_remote_code=True,
103
- low_cpu_mem_usage=True
104
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
105
  else:
106
- # CPU mode - aggressive quantization
107
- self.model = AutoModelForCausalLM.from_pretrained(
108
- self.model_id,
109
- torch_dtype=torch.float32,
110
- device_map="cpu",
111
- low_cpu_mem_usage=True
112
  )
113
-
114
- self.model.eval()
115
- print("βœ… Model loaded successfully")
116
-
117
  except Exception as e:
118
- print(f"❌ Error loading model: {e}")
119
- raise
120
-
121
- def analyze_text_patterns(self, text: str) -> Dict:
122
- """Analyze writing patterns"""
123
- sentences = re.split(r'(?<=[.!?])\s+', text)
124
- words = text.split()
125
-
126
- # Burstiness calculation
127
- if len(sentences) > 1:
128
- sent_lengths = [len(s.split()) for s in sentences]
129
- burstiness = np.std(sent_lengths) / (np.mean(sent_lengths) + 1e-8)
130
- else:
131
- burstiness = 0
132
-
133
- # AI markers detection
134
- ai_patterns = [
135
- r'\b(delve|leverage|utilize|facilitate|optimize|embark|journey|seamless)\b',
136
- r'\b(In conclusion|Furthermore|Moreover|Additionally|Thus|Therefore)\b',
137
- r'\b(It is important to note that|It should be noted that)\b',
138
- r'\b(In today\'s world|In the digital age)\b',
139
- r'\b(As an AI|As a language model)\b',
140
- ]
141
-
142
- pattern_matches = sum(len(re.findall(p, text, re.I)) for p in ai_patterns)
143
-
144
- return {
145
- "burstiness": burstiness,
146
- "avg_sentence_length": np.mean([len(s.split()) for s in sentences]) if sentences else 0,
147
- "ai_markers": pattern_matches,
148
- "word_count": len(words),
149
- }
150
-
151
- def generate_humanization_prompt(self, text: str, config: HumanizationConfig,
152
- style: str, analysis: Dict) -> Tuple[str, str]:
153
- """Generate prompts"""
154
-
155
- style_instructions = {
156
- "casual": "Sound like a knowledgeable friend. Contractions, everyday words, conversational flow.",
157
- "professional": "Smart colleague tone. Clear but warm, not robotic.",
158
- "academic": "Passionate professor speaking. Scholarly but accessible, personal perspective.",
159
- "creative": "Vivid, rhythmic, metaphors. Show don't tell.",
160
- "internet": "Authentic web voice. Blog style, informative but casual.",
161
- "genz": "Modern casual. Direct, slightly irreverent, very natural."
162
- }
163
-
164
- style_instruction = style_instructions.get(style, style_instructions["casual"])
165
-
166
- de_ai_specific = ""
167
- if analysis["ai_markers"] > 0:
168
- de_ai_specific = f"""
169
- REMOVE {analysis['ai_markers']} AI MARKERS DETECTED:
170
- - Replace: delve→look into, leverage→use, utilize→use, facilitate→help, optimize→improve
171
- - Remove: "In conclusion", "Furthermore", "It is important to note"
172
- """
173
-
174
- imperfections = ""
175
  if config.add_imperfections:
176
- imperfections = """
177
- ADD NATURAL IMPERFECTIONS:
178
- - Start some sentences with "But", "And", "So", "Yet"
179
- - Use occasional fragments
180
- - Include contractions (don't, can't, it's)
181
- - Add one rhetorical question
182
- - Use varied punctuation (β€”, ...)
183
- """
184
-
185
- system_msg = f"""You transform AI text into authentic human writing. {style_instruction}
186
-
187
- {de_ai_specific}
188
- {imperfections}
189
 
190
- RULES:
191
- 1. Keep all original information
192
- 2. Vary sentence lengths dramatically (short + long mix)
193
- 3. Use active voice
194
- 4. Add subtle opinion ("I think", "probably")
195
- 5. Output ONLY the rewritten text, no explanations, no quotes"""
196
-
197
- user_msg = f"Humanize this text:\n\n{text}"
198
-
199
- return system_msg, user_msg
200
-
201
- def humanize(self, text: str, style: str = "casual", intensity: str = "medium",
202
- creativity: float = 0.8, add_imperfections: bool = True) -> Tuple[str, Dict]:
203
- """Main pipeline"""
204
- if not text.strip():
205
- return "", {"error": "Empty input"}
206
-
207
- config = HumanizationConfig(
208
- temperature=creativity,
209
- style_intensity=intensity,
210
- add_imperfections=add_imperfections
211
  )
212
-
213
- # Analysis
214
- analysis = self.analyze_text_patterns(text)
215
-
216
- # Generate prompt
217
- system_msg, user_msg = self.generate_humanization_prompt(text, config, style, analysis)
218
-
219
- # Qwen chat format
220
- messages = [
221
- {"role": "system", "content": system_msg},
222
- {"role": "user", "content": user_msg}
223
- ]
224
-
225
- prompt = self.tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
226
- inputs = self.tokenizer(prompt, return_tensors="pt", truncation=True, max_length=2048)
227
- inputs = {k: v.to(self.model.device) for k, v in inputs.items()}
228
-
229
- # Generate
230
- with torch.no_grad():
231
- outputs = self.model.generate(
232
- **inputs,
233
- max_new_tokens=min(len(text.split()) * 2, 1024),
234
- temperature=config.temperature,
235
- top_p=config.top_p,
236
- repetition_penalty=config.repetition_penalty,
237
- do_sample=True,
238
- pad_token_id=self.tokenizer.pad_token_id,
239
- eos_token_id=self.tokenizer.eos_token_id,
240
- )
241
-
242
- # Decode
243
- full_output = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
244
- assistant_response = full_output.split("assistant")[-1].strip()
245
- if assistant_response.startswith(":"):
246
- assistant_response = assistant_response[1:].strip()
247
-
248
- # Post-process
249
- if intensity == "aggressive":
250
- assistant_response = self._aggressive_variation(assistant_response)
251
- elif intensity == "light":
252
- assistant_response = self._light_cleanup(assistant_response)
253
-
254
- # Metrics
255
- final_analysis = self.analyze_text_patterns(assistant_response)
256
- human_score = self._calculate_human_score(assistant_response, final_analysis, analysis)
257
-
258
- metrics = {
259
- "ai_markers_removed": analysis["ai_markers"] - final_analysis["ai_markers"],
260
- "burstiness": round(final_analysis["burstiness"], 2),
261
- "human_score": human_score,
262
- "intensity": intensity,
263
- }
264
-
265
- # Cleanup
266
- if self.device == "cuda":
267
- torch.cuda.empty_cache()
268
- gc.collect()
269
-
270
- return assistant_response, metrics
271
-
272
- def _aggressive_variation(self, text: str) -> str:
273
- """Add variation"""
274
- # Combine sentences occasionally
275
- text = re.sub(r'([.])\s+([A-Z])', lambda m: f", and {m.group(2).lower()}" if random.random() > 0.8 else f". {m.group(1)}", text)
276
-
277
- # Add fragments
278
- sentences = text.split('. ')
279
- if len(sentences) > 3 and random.random() > 0.5:
280
- idx = random.randint(1, len(sentences)-2)
281
- words = sentences[idx].split()
282
- if len(words) > 4:
283
- sentences[idx] = ' '.join(words[:2]) + "..."
284
- return '. '.join(sentences)
285
-
286
- def _light_cleanup(self, text: str) -> str:
287
- """Minimal cleanup"""
288
- text = re.sub(r'\b(In conclusion|To summarize|Overall),\s*', '', text, flags=re.I)
289
- text = re.sub(r'\b(it is important to note that)\s*', '', text, flags=re.I)
290
- return text.strip()
291
-
292
- def _calculate_human_score(self, text: str, final: Dict, original: Dict) -> int:
293
- """Score 0-100"""
294
- score = 75
295
- score -= original["ai_markers"] * 10
296
- score += (original["ai_markers"] - final["ai_markers"]) * 15
297
-
298
- if final["burstiness"] > 0.4:
299
- score += 15
300
- elif final["burstiness"] > 0.2:
301
- score += 8
302
-
303
- contractions = len(re.findall(r"\b\w+'\w+\b", text))
304
- if contractions >= 2:
305
- score += 10
306
-
307
- return max(0, min(100, score))
308
-
309
- # Initialize
310
- print("πŸ”„ Initializing...")
311
- try:
312
- humanizer = DeepHumanizer()
313
- except Exception as e:
314
- print(f"⚠️ Initialization error: {e}")
315
- humanizer = None
316
-
317
- def process_text(text, style, intensity, creativity, add_imperfections):
318
- """Handler"""
319
- if humanizer is None:
320
- return "❌ Model failed to load", "<div style='color:red'>Initialization error</div>"
321
-
322
- if not text or len(text.strip()) < 10:
323
- return "⚠️ Enter at least 10 characters", ""
324
-
325
- try:
326
- humanized, metrics = humanizer.humanize(
327
- text=text, style=style, intensity=intensity,
328
- creativity=creativity, add_imperfections=add_imperfections
329
  )
330
-
331
- status_color = "#22c55e" if metrics["human_score"] > 80 else "#eab308" if metrics["human_score"] > 60 else "#ef4444"
332
-
333
- metrics_html = f"""
334
- <div style="border: 2px solid {status_color}; border-radius: 8px; padding: 15px; margin-top: 10px;">
335
- <h4 style="margin-top: 0; color: {status_color};">πŸ“Š Human Score: {metrics['human_score']}/100</h4>
336
- <div style="display: grid; grid-template-columns: 1fr 1fr; gap: 10px; font-size: 14px;">
337
- <div>🎯 AI markers removed: {metrics['ai_markers_removed']}</div>
338
- <div>πŸ“ˆ Burstiness: {metrics['burstiness']}</div>
339
- <div>⚑ Intensity: {metrics['intensity'].title()}</div>
340
- <div>🎭 Style: {style.title()}</div>
341
- </div>
342
- </div>
343
- """
344
-
345
- return humanized, metrics_html
346
-
347
- except Exception as e:
348
- return f"❌ Error: {str(e)}", f"<div style='color:red'>{str(e)}</div>"
349
-
350
- # Gradio UI (4.0.0 compatible syntax)
351
- css = """
352
- .output-box { min-height: 200px; }
353
- .metric-box { background: #f9fafb; padding: 10px; border-radius: 5px; }
354
- """
355
 
356
- with gr.Blocks(css=css, title="Deep Humanizer Pro") as demo:
357
- gr.Markdown("""
358
- # 🧠 Deep Humanizer Pro
359
- ### Advanced AI-to-Human Text Transformation
360
- *Powered by Qwen2.5-7B with linguistic analysis*
361
- """)
362
-
363
- with gr.Row():
364
- with gr.Column(scale=1):
365
- input_text = gr.Textbox(
366
- label="πŸ“ Input (AI text)",
367
- placeholder="Paste AI-generated content here...",
368
- lines=10
369
- )
370
-
371
- with gr.Row():
372
- style = gr.Dropdown(
373
- choices=["casual", "professional", "academic", "creative", "internet", "genz"],
374
- value="casual",
375
- label="🎭 Style"
 
 
 
 
 
376
  )
377
- intensity = gr.Radio(
378
- choices=["light", "medium", "aggressive"],
379
- value="medium",
380
- label="⚑ Intensity"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
381
  )
382
-
383
- creativity = gr.Slider(
384
- minimum=0.1, maximum=1.0, value=0.8, step=0.1,
385
- label="🎨 Creativity"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
386
  )
387
- add_imperfections = gr.Checkbox(
388
- label="✨ Natural Imperfections",
389
- value=True
 
 
390
  )
391
-
392
- submit_btn = gr.Button("πŸš€ Humanize", variant="primary")
393
-
394
- with gr.Column(scale=1):
395
- output_text = gr.Textbox(
396
- label="βœ… Humanized Output",
397
- lines=10,
398
- elem_classes=["output-box"]
399
  )
400
- metrics_display = gr.HTML()
401
-
402
- gr.Examples(
403
- examples=[
404
- ["Artificial Intelligence refers to the simulation of human intelligence in machines programmed to think like humans.", "casual", "medium"],
405
- ["In conclusion, it is important to note that leveraging cutting-edge technologies facilitates optimal outcomes.", "professional", "aggressive"],
406
- ],
407
- inputs=[input_text, style, intensity],
408
- label="🎯 Examples"
409
- )
410
-
411
- submit_btn.click(
412
- fn=process_text,
413
- inputs=[input_text, style, intensity, creativity, add_imperfections],
414
- outputs=[output_text, metrics_display]
415
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
416
 
417
  if __name__ == "__main__":
418
- demo.launch()
 
1
+ """
2
+ 🧬 Advanced AI Text Humanizer
3
+ ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
4
+ Multi-model ensemble humanization pipeline for Hugging Face Spaces.
5
+ Uses state-of-the-art LLMs with multiple rewriting strategies,
6
+ style transfer, readability optimization, and AI-detection evasion.
7
+
8
+ Models Used (in ensemble pipeline):
9
+ 1. meta-llama/Llama-3.3-70B-Instruct β€” Primary rewriter
10
+ 2. mistralai/Mistral-7B-Instruct-v0.3 β€” Secondary rewriter
11
+ 3. HuggingFaceH4/zephyr-7b-beta β€” Style transfer
12
+ 4. facebook/bart-large-cnn β€” Paraphrase refinement
13
+ 5. SentenceTransformers for similarity scoring
14
+
15
+ Author: Advanced Humanizer Pipeline
16
+ Space Hardware: GPU A100 (paid config)
17
+ """
18
+
19
+ import os
20
+ import re
21
+ import json
22
+ import time
23
+ import random
24
+ import logging
25
+ import hashlib
26
+ import textwrap
27
+ import difflib
28
+ from typing import Optional, List, Dict, Tuple, Any
29
+ from dataclasses import dataclass, field
30
+ from concurrent.futures import ThreadPoolExecutor, as_completed
31
+ from collections import Counter
32
 
33
  import gradio as gr
34
+ import numpy as np
35
  import torch
 
 
 
 
36
  from transformers import (
37
+ AutoTokenizer,
38
+ AutoModelForCausalLM,
39
+ AutoModelForSeq2SeqLM,
40
+ pipeline,
41
+ TextGenerationPipeline,
42
+ set_seed,
43
  )
44
+ from transformers.generation.utils import GenerationConfig
45
+ from huggingface_hub import InferenceClient
46
+ import requests
47
+ from sentence_transformers import SentenceTransformer
48
+ import nltk
49
+ from nltk.tokenize import sent_tokenize, word_tokenize
50
+ from nltk.corpus import stopwords
51
+ from readability import Readability
52
+
53
+ # ─────────────────────────────────────────────
54
+ # Download NLTK data
55
+ # ─────────────────────────────────────────────
56
+ for nltk_resource in ["punkt", "punkt_tab", "stopwords", "averaged_perceptron_tagger"]:
57
+ try:
58
+ nltk.data.find(f"tokenizers/{nltk_resource}")
59
+ except LookupError:
60
+ nltk.download(nltk_resource, quiet=True)
61
+
62
+ # ─────────────────────────────────────────────
63
+ # Configuration
64
+ # ─────────────────────────────────────────────
65
+ HF_TOKEN = os.environ.get("HF_TOKEN", "")
66
+ DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
67
+
68
+ logging.basicConfig(
69
+ level=logging.INFO,
70
+ format="%(asctime)s [%(levelname)s] %(name)s: %(message)s",
71
+ )
72
+ logger = logging.getLogger("humanizer")
73
 
74
+ # ─────────────────────────────────────────────
75
+ # Data Classes
76
+ # ─────────────────────────────────────────────
77
  @dataclass
78
+ class HumanizationResult:
79
+ original: str
80
+ humanized: str
81
+ model_used: str
82
+ mode: str
83
+ changes_made: int
84
+ similarity_score: float
85
+ readability_before: Dict[str, float]
86
+ readability_after: Dict[str, float]
87
+ ai_probability_before: float
88
+ ai_probability_after: float
89
+ processing_time: float
90
+ strategies_applied: List[str]
91
+ word_count_before: int
92
+ word_count_after: int
93
+ perplexity_before: float
94
+ perplexity_after: float
95
+
96
+
97
+ @dataclass
98
+ class PipelineConfig:
99
+ mode: str = "balanced"
100
+ intensity: float = 0.5
101
  preserve_meaning: bool = True
102
  add_imperfections: bool = True
103
+ vary_sentence_length: bool = True
104
+ add_transitions: bool = True
105
+ remove_patterns: bool = True
106
+ add_personal_touch: bool = True
107
+ temperature: float = 0.7
108
+ top_p: float = 0.9
109
+ max_tokens: int = 2048
110
+ ensemble: bool = True
111
+ use_all_models: bool = True
112
+
113
+
114
+ # ─────────────────────────────────────────────
115
+ # Model Registry
116
+ # ─────────────────────────────────────────────
117
+ MODEL_REGISTRY = {
118
+ "llama_3_70b": {
119
+ "id": "meta-llama/Llama-3.3-70B-Instruct",
120
+ "name": "Llama 3.3 70B Instruct",
121
+ "type": "chat",
122
+ "max_length": 8192,
123
+ "description": "Primary powerhouse model for deep rewriting",
124
+ },
125
+ "mistral_7b": {
126
+ "id": "mistralai/Mistral-7B-Instruct-v0.3",
127
+ "name": "Mistral 7B Instruct v0.3",
128
+ "type": "chat",
129
+ "max_length": 32768,
130
+ "description": "Fast and creative secondary model",
131
+ },
132
+ "zephyr_7b": {
133
+ "id": "HuggingFaceH4/zephyr-7b-beta",
134
+ "name": "Zephyr 7B Beta",
135
+ "type": "chat",
136
+ "max_length": 4096,
137
+ "description": "Excellent style transfer capabilities",
138
+ },
139
+ "phi_3_mini": {
140
+ "id": "microsoft/Phi-3-mini-128k-instruct",
141
+ "name": "Phi-3 Mini 128K",
142
+ "type": "chat",
143
+ "max_length": 128000,
144
+ "description": "Lightweight model for quick passes",
145
+ },
146
+ "bart_paraphrase": {
147
+ "id": "facebook/bart-large-cnn",
148
+ "name": "BART Large CNN",
149
+ "type": "seq2seq",
150
+ "max_length": 1024,
151
+ "description": "Specialized paraphrasing model",
152
+ },
153
+ "gemma_2_27b": {
154
+ "id": "google/gemma-2-27b-it",
155
+ "name": "Gemma 2 27B IT",
156
+ "type": "chat",
157
+ "max_length": 8192,
158
+ "description": "Google's instruction-tuned model",
159
+ },
160
+ }
161
+
162
+
163
+ # ─────────────────────────────────────────────
164
+ # AI Detection Model
165
+ # ─────────────────────────────────────────────
166
+ class AIDetector:
167
+ """Estimates probability that text is AI-generated."""
168
 
 
169
  def __init__(self):
 
 
 
 
 
 
170
  self.device = "cuda" if torch.cuda.is_available() else "cpu"
171
+ # Use a lightweight detector
172
+ self.model_name = "roberta-base-openai-detector"
173
+ try:
174
+ self.tokenizer = AutoTokenizer.from_pretrained(self.model_name)
175
+ self.model = AutoModelForCausalLM.from_pretrained(
176
+ self.model_name, device_map="auto"
177
+ )
178
+ self.loaded = True
179
+ logger.info(f"AI Detector loaded: {self.model_name}")
180
+ except Exception as e:
181
+ logger.warning(f"AI Detector failed to load: {e}")
182
+ self.loaded = False
183
+
184
+ def detect(self, text: str) -> float:
185
+ """Returns probability (0-1) that text is AI-generated."""
186
+ if not self.loaded or not text.strip():
187
+ return self._heuristic_detect(text)
188
+
189
+ try:
190
+ inputs = self.tokenizer(text, return_tensors="pt", truncation=True, max_length=512)
191
+ inputs = {k: v.to(self.device) for k, v in inputs.items()}
192
+
193
+ with torch.no_grad():
194
+ outputs = self.model(**inputs)
195
+ logits = outputs.logits
196
+
197
+ # Get probabilities for human (0) vs AI (1)
198
+ probs = torch.softmax(logits[0], dim=-1)
199
+ ai_prob = probs[0][1].item() if probs.shape[-1] > 1 else 0.5
200
+ return min(max(ai_prob, 0.0), 1.0)
201
+ except Exception as e:
202
+ logger.error(f"Detection error: {e}")
203
+ return self._heuristic_detect(text)
204
+
205
+ def _heuristic_detect(self, text: str) -> float:
206
+ """Fallback heuristic AI detection."""
207
+ if not text.strip():
208
+ return 0.5
209
+
210
+ ai_indicators = [
211
+ r"\b(In conclusion|Furthermore|Moreover|Additionally|It's important to note|Delve|Tapestry|Testament|Landscape|Realm|Harness|Leverage)\b",
212
+ r"\b(very|really|quite|extremely|significantly)\b",
213
+ r"\b(as an AI|language model|I don't have|I cannot)\b",
214
+ r"[.,]{2,}",
215
+ r"\b(fist|second|third|finally|in summary)\b",
216
+ ]
217
+
218
+ sentences = sent_tokenize(text)
219
+ score = 0.0
220
+
221
+ if len(sentences) > 0:
222
+ avg_len = sum(len(s.split()) for s in sentences) / len(sentences)
223
+ # AI tends to have very uniform sentence lengths
224
+ if avg_len > 15 and avg_len < 25:
225
+ score += 0.2
226
+
227
+ for pattern in ai_indicators:
228
+ matches = len(re.findall(pattern, text, re.IGNORECASE))
229
+ score += matches * 0.1
230
+
231
+ # Check for low burstiness (uniform complexity)
232
+ words = text.split()
233
+ if len(words) > 10:
234
+ word_lengths = [len(w) for w in words]
235
+ variance = np.var(word_lengths)
236
+ if variance < 3.0:
237
+ score += 0.15
238
+
239
+ return min(max(score, 0.0), 1.0)
240
+
241
+
242
+ # ─────────────────────────────────────────────
243
+ # Readability Analyzer
244
+ # ─────────────────────────────────────────────
245
+ class ReadabilityAnalyzer:
246
+ """Comprehensive readability analysis."""
247
+
248
+ @staticmethod
249
+ def analyze(text: str) -> Dict[str, float]:
250
+ if not text.strip():
251
+ return {}
252
+
253
+ try:
254
+ r = Readability(text)
255
+ results = {}
256
+
257
+ try:
258
+ fm = r.flesch_michaud()
259
+ results["flesch_reading_ease"] = fm.score
260
+ results["grade_level"] = fm.grade_level
261
+ except:
262
+ pass
263
+
264
+ try:
265
+ fk = r.flesch_kincaid()
266
+ results["flesch_kincaid_grade"] = fk.grade_level
267
+ except:
268
+ pass
269
+
270
+ try:
271
+ g = r.gunning_fog()
272
+ results["gunning_fog"] = g.grade_level
273
+ except:
274
+ pass
275
+
276
+ try:
277
+ smog = r.smog()
278
+ results["smog_index"] = smog.grade_level
279
+ except:
280
+ pass
281
+
282
+ results["word_count"] = len(text.split())
283
+ results["sentence_count"] = len(sent_tokenize(text))
284
+ results["avg_words_per_sentence"] = (
285
+ results["word_count"] / max(results["sentence_count"], 1)
286
+ )
287
+ results["avg_word_length"] = np.mean([len(w) for w in text.split()]) if text.split() else 0
288
+
289
+ # Burstiness (variation in sentence length)
290
+ sent_lengths = [len(s.split()) for s in sent_tokenize(text)]
291
+ if len(sent_lengths) > 1:
292
+ results["burstiness"] = np.std(sent_lengths)
293
+ results["perplexity"] = np.exp(
294
+ -np.mean([np.log(max(l, 1)) for l in sent_lengths])
295
+ )
296
+ else:
297
+ results["burstiness"] = 0
298
+ results["perplexity"] = 1
299
+
300
+ return results
301
+ except Exception as e:
302
+ logger.error(f"Readability analysis error: {e}")
303
+ return {"error": str(e)}
304
+
305
+
306
+ # ─────────────────────────────────────────────
307
+ # Similarity Scorer
308
+ # ─────────────────────────────────────────────
309
+ class SimilarityScorer:
310
+ """Measures semantic similarity between texts."""
311
+
312
+ def __init__(self):
313
+ self.device = "cuda" if torch.cuda.is_available() else "cpu"
314
+ try:
315
+ self.model = SentenceTransformer(
316
+ "sentence-transformers/all-MiniLM-L6-v2",
317
+ device=self.device,
318
+ )
319
+ self.loaded = True
320
+ logger.info("Similarity scorer loaded")
321
+ except Exception as e:
322
+ logger.warning(f"Similarity scorer failed: {e}")
323
+ self.loaded = False
324
+
325
+ def score(self, text1: str, text2: str) -> float:
326
+ if not self.loaded:
327
+ return self._simple_similarity(text1, text2)
328
  try:
329
+ embeddings = self.model.encode([text1, text2], convert_to_numpy=True)
330
+ sim = float(
331
+ np.dot(embeddings[0], embeddings[1])
332
+ / (np.linalg.norm(embeddings[0]) * np.linalg.norm(embeddings[1]))
333
  )
334
+ return max(0.0, min(1.0, sim))
335
+ except Exception as e:
336
+ logger.error(f"Similarity scoring error: {e}")
337
+ return self._simple_similarity(text1, text2)
338
+
339
+ @staticmethod
340
+ def _simple_similarity(t1: str, t2: str) -> float:
341
+ words1 = set(t1.lower().split())
342
+ words2 = set(t2.lower().split())
343
+ if not words1 or not words2:
344
+ return 0.0
345
+ return len(words1 & words2) / len(words1 | words2)
346
+
347
+
348
+ # ─────────────────────────────────────────────
349
+ # Prompt Templates
350
+ # ─────────────────────────────────────────────
351
+ PROMPT_TEMPLATES = {
352
+ "casual": {
353
+ "system": """You are an expert at rewriting AI-generated text to sound like it was written by a real, casual human. Your writing has these characteristics:
354
+ - Uses contractions naturally (don't, can't, it's, I'm)
355
+ - Varies sentence length significantly (some very short, some longer)
356
+ - Occasionally starts sentences with "And", "But", "So"
357
+ - Uses colloquial expressions and mild interjections
358
+ - Has natural imperfections β€” not every sentence is grammatically perfect
359
+ - Sounds conversational, like explaining something to a friend
360
+ - Uses specific examples and personal-feeling language
361
+ - Avoids overly formal transitions and academic phrasing
362
+ - Writes with personality and occasional humor
363
+ - Uses rhetorical questions naturally""",
364
+ "user": """Rewrite the following text to sound completely human and casual. Make it sound like a real person wrote it naturally. Preserve the core meaning and information, but completely transform the style.
365
+
366
+ RULES:
367
+ 1. DO NOT use phrases like "In conclusion", "Furthermore", "Moreover", "Additionally", "It's important to note"
368
+ 2. DO NOT use overly formal academic language
369
+ 3. DO NOT make every sentence the same length
370
+ 4. DO use contractions frequently
371
+ 5. DO vary your sentence structure
372
+ 6. DO add natural transitions that humans actually use
373
+ 7. DO make it sound like someone speaking casually but intelligently
374
+
375
+ Original text:
376
+ {text}""",
377
+ },
378
+ "professional": {
379
+ "system": """You are an expert professional writer who makes AI text sound authentically human. Your professional writing:
380
+ - Uses precise, industry-appropriate language without being robotic
381
+ - Varies sentence structure and length naturally
382
+ - Includes subtle personal insights and perspective
383
+ - Uses professional but warm tone
384
+ - Avoids clichΓ© AI phrases and patterns
385
+ - Writes with authority but approachability
386
+ - Uses specific data points and concrete examples
387
+ - Has natural paragraph flow""",
388
+ "user": """Rewrite the following text to sound like it was written by a seasoned professional in the field. Make it sound authentically human while maintaining professionalism.
389
+
390
+ RULES:
391
+ 1. Remove any robotic or template-sounding phrases
392
+ 2. Add subtle professional personality
393
+ 3. Use specific, concrete language
394
+ 4. Vary sentence structure naturally
395
+ 5. Maintain the core information and accuracy
396
+ 6. Sound authoritative but approachable
397
+ 7. Avoid AI-typical transition words
398
+
399
+ Original text:
400
+ {text}""",
401
+ },
402
+ "creative": {
403
+ "system": """You are a creative writer who excels at making text sound deeply human and engaging. Your writing:
404
+ - Uses vivid imagery and sensory details
405
+ - Employs metaphor and analogy naturally
406
+ - Has strong narrative flow
407
+ - Varies rhythm and pacing
408
+ - Shows personality and voice
409
+ - Uses creative sentence structures
410
+ - Includes unexpected but fitting word choices
411
+ - Feels alive and dynamic""",
412
+ "user": """Transform the following text into something that reads like it was written by a talented creative human writer. Make it engaging, vivid, and full of personality while preserving the core message.
413
+
414
+ RULES:
415
+ 1. Add vivid imagery and sensory details where appropriate
416
+ 2. Use metaphor and creative comparisons
417
+ 3. Vary rhythm β€” mix short punchy sentences with longer flowing ones
418
+ 4. Show, don't just tell
419
+ 5. Make it emotionally engaging
420
+ 6. Avoid any AI-sounding clichΓ©s
421
+ 7. Write with unmistakable human voice and style
422
+
423
+ Original text:
424
+ {text}""",
425
+ },
426
+ "academic": {
427
+ "system": """You are an academic writer who makes scholarly text sound authentically human. Your academic writing:
428
+ - Uses precise scholarly language without being mechanical
429
+ - Shows genuine intellectual curiosity
430
+ - Includes nuanced arguments and counterpoints
431
+ - Uses natural academic transitions
432
+ - Varies sentence complexity
433
+ - Shows the author's analytical voice
434
+ - Cites reasoning naturally
435
+ - Avoids formulaic academic AI patterns""",
436
+ "user": """Rewrite the following academic text to sound like it was written by a thoughtful human scholar. Make it sound like genuine intellectual writing, not AI-generated academic prose.
437
+
438
+ RULES:
439
+ 1. Remove formulaic academic AI phrases
440
+ 2. Show genuine analytical thinking
441
+ 3. Use natural scholarly transitions
442
+ 4. Include nuanced perspectives
443
+ 5. Vary sentence complexity naturally
444
+ 6. Sound like a real academic with a distinct voice
445
+ 7. Maintain academic rigor while sounding human
446
+
447
+ Original text:
448
+ {text}""",
449
+ },
450
+ "balanced": {
451
+ "system": """You are an expert at making AI-generated text sound completely human. You analyze the input text and rewrite it with these human characteristics:
452
+ - Natural sentence variation (mix of short, medium, and long sentences)
453
+ - Authentic voice and personality
454
+ - Natural imperfections (occasional fragments, starting with conjunctions)
455
+ - Realistic transitions (not formulaic)
456
+ - Appropriate use of contractions
457
+ - Specific and concrete language instead of vague generalizations
458
+ - Natural paragraph structure
459
+ - Human-like word choice and phrasing
460
+ - Appropriate level of formality based on context""",
461
+ "user": """Rewrite the following text to make it sound 100% human-written. The goal is to preserve all the original information and meaning while completely transforming how it reads β€” it should pass as authentic human writing.
462
+
463
+ RULES:
464
+ 1. NEVER use: "In conclusion", "Furthermore", "Moreover", "Additionally", "It's important to note", "Delve", "Tapestry", "Testament"
465
+ 2. Vary sentence length significantly β€” include some very short sentences
466
+ 3. Use contractions naturally
467
+ 4. Add subtle personality and voice
468
+ 5. Use specific, concrete language
469
+ 6. Start some sentences with "And", "But", "So", "Because"
470
+ 7. Make it read like a smart human wrote it naturally
471
+
472
+ Original text:
473
+ {text}""",
474
+ },
475
+ }
476
+
477
+
478
+ # ─────────────────────────────────────────────
479
+ # Text Analysis Utilities
480
+ # ─────────────────────────────────────────────
481
+ class TextAnalyzer:
482
+ """Comprehensive text analysis utilities."""
483
+
484
+ @staticmethod
485
+ def detect_ai_patterns(text: str) -> List[Dict[str, Any]]:
486
+ """Detect common AI writing patterns."""
487
+ patterns = []
488
+
489
+ ai_phrases = [
490
+ "in conclusion", "furthermore", "moreover", "additionally",
491
+ "it's important to note", "it is important to note",
492
+ "delve into", "delve deep", "tapestry", "testament to",
493
+ "in the realm of", "in today's world", "in today's digital",
494
+ "ever-evolving", "rapidly evolving", "fast-paced",
495
+ "harness the power", "leverage", "utilize",
496
+ "a testament to", "a rich tapestry", "navigate the landscape",
497
+ "foster a sense", "fosters a deeper", "pivotal role",
498
+ "shed light on", "play a crucial role", "plays a vital role",
499
+ "it's worth noting", "it is worth noting",
500
+ "notably", "crucially", "significantly",
501
+ "in essence", "in summary", "to summarize",
502
+ ]
503
+
504
+ text_lower = text.lower()
505
+ for phrase in ai_phrases:
506
+ if phrase in text_lower:
507
+ patterns.append({
508
+ "type": "ai_phrase",
509
+ "phrase": phrase,
510
+ "severity": "medium",
511
+ })
512
+
513
+ # Check for overly uniform sentence lengths
514
+ sentences = sent_tokenize(text)
515
+ if len(sentences) > 3:
516
+ lengths = [len(s.split()) for s in sentences]
517
+ std_dev = np.std(lengths)
518
+ if std_dev < 3:
519
+ patterns.append({
520
+ "type": "uniform_sentences",
521
+ "detail": f"Low sentence length variation (std={std_dev:.1f})",
522
+ "severity": "high",
523
+ })
524
+
525
+ # Check for lack of contractions
526
+ contraction_count = len(re.findall(r"\b\w+'\w+\b", text))
527
+ word_count = len(text.split())
528
+ if word_count > 50 and contraction_count < 3:
529
+ patterns.append({
530
+ "type": "no_contractions",
531
+ "detail": f"Only {contraction_count} contractions in {word_count} words",
532
+ "severity": "medium",
533
+ })
534
+
535
+ # Check for repetitive sentence starters
536
+ starters = [s.split()[0].lower() if s.split() else "" for s in sentences]
537
+ starter_counts = Counter(starters)
538
+ for starter, count in starter_counts.items():
539
+ if count > len(sentences) * 0.3 and len(starter) > 2:
540
+ patterns.append({
541
+ "type": "repetitive_start",
542
+ "detail": f"'{starter}' starts {count}/{len(sentences)} sentences",
543
+ "severity": "medium",
544
+ })
545
+
546
+ return patterns
547
+
548
+ @staticmethod
549
+ def get_diff_html(original: str, humanized: str) -> str:
550
+ """Generate HTML diff showing changes."""
551
+ orig_words = original.split()
552
+ human_words = humanized.split()
553
+
554
+ matcher = difflib.SequenceMatcher(None, orig_words, human_words)
555
+ html = []
556
+
557
+ for tag, i1, i2, j1, j2 in matcher.get_opcodes():
558
+ if tag == "equal":
559
+ html.extend(orig_words[i1:i2])
560
+ elif tag == "replace":
561
+ html.append('<span style="background:#ffcccc;text-decoration:line-through">'
562
+ + " ".join(orig_words[i1:i2]) + "</span>")
563
+ html.append('<span style="background:#ccffcc">'
564
+ + " ".join(human_words[j1:j2]) + "</span>")
565
+ elif tag == "delete":
566
+ html.append('<span style="background:#ffcccc;text-decoration:line-through">'
567
+ + " ".join(orig_words[i1:i2]) + "</span>")
568
+ elif tag == "insert":
569
+ html.append('<span style="background:#ccffcc">'
570
+ + " ".join(human_words[j1:j2]) + "</span>")
571
+
572
+ return " ".join(html)
573
+
574
+
575
+ # ─────────────────────────────────────────────
576
+ # Model Manager
577
+ # ─────────────────────────────────────────────
578
+ class ModelManager:
579
+ """Manages loading and inference of all models."""
580
+
581
+ def __init__(self):
582
+ self.models = {}
583
+ self.pipelines = {}
584
+ self.tokenizers = {}
585
+ self.loaded = False
586
+
587
+ def load_models(self, model_keys: Optional[List[str]] = None):
588
+ """Load specified models into memory."""
589
+ if model_keys is None:
590
+ model_keys = ["llama_3_70b", "mistral_7b", "bart_paraphrase"]
591
+
592
+ for key in model_keys:
593
+ if key not in MODEL_REGISTRY:
594
+ continue
595
+
596
+ model_info = MODEL_REGISTRY[key]
597
+ try:
598
+ logger.info(f"Loading model: {model_info['name']}...")
599
+
600
+ if model_info["type"] == "chat":
601
+ tokenizer = AutoTokenizer.from_pretrained(
602
+ model_info["id"],
603
+ token=HF_TOKEN,
604
+ trust_remote_code=True,
605
  )
606
+ if tokenizer.pad_token is None:
607
+ tokenizer.pad_token = tokenizer.eos_token
608
+
609
+ model = AutoModelForCausalLM.from_pretrained(
610
+ model_info["id"],
611
+ torch_dtype=torch.float16,
612
  device_map="auto",
613
+ token=HF_TOKEN,
614
  trust_remote_code=True,
 
 
615
  )
616
+
617
+ pipe = pipeline(
618
+ "text-generation",
619
+ model=model,
620
+ tokenizer=tokenizer,
621
+ torch_dtype=torch.float16,
622
+ device_map="auto",
623
  )
624
+
625
+ self.models[key] = model
626
+ self.tokenizers[key] = tokenizer
627
+ self.pipelines[key] = pipe
628
+
629
+ elif model_info["type"] == "seq2seq":
630
+ tokenizer = AutoTokenizer.from_pretrained(model_info["id"])
631
+ model = AutoModelForSeq2SeqLM.from_pretrained(
632
+ model_info["id"],
633
+ torch_dtype=torch.float16,
634
  device_map="auto",
 
 
635
  )
636
+ pipe = pipeline(
637
+ "text2text-generation",
638
+ model=model,
639
+ tokenizer=tokenizer,
640
+ torch_dtype=torch.float16,
641
  device_map="auto",
 
 
642
  )
643
+ self.models[key] = model
644
+ self.tokenizers[key] = tokenizer
645
+ self.pipelines[key] = pipe
646
+
647
+ logger.info(f"βœ… Loaded: {model_info['name']}")
648
+
649
+ except Exception as e:
650
+ logger.error(f"❌ Failed to load {model_info['name']}: {e}")
651
+ # Try HF Inference API as fallback
652
+ try:
653
+ client = InferenceClient(
654
+ model=model_info["id"],
655
+ token=HF_TOKEN,
656
+ )
657
+ self.pipelines[key] = client
658
+ logger.info(f"βœ… Using Inference API for: {model_info['name']}")
659
+ except Exception as e2:
660
+ logger.error(f"❌ Inference API also failed for {key}: {e2}")
661
+
662
+ self.loaded = True
663
+ logger.info(f"Model loading complete. Loaded: {list(self.pipelines.keys())}")
664
+
665
+ def generate(
666
+ self,
667
+ model_key: str,
668
+ prompt: str,
669
+ system_prompt: str = "",
670
+ temperature: float = 0.7,
671
+ top_p: float = 0.9,
672
+ max_tokens: int = 2048,
673
+ ) -> str:
674
+ """Generate text using specified model."""
675
+ if model_key not in self.pipelines:
676
+ logger.error(f"Model {model_key} not loaded")
677
+ return ""
678
+
679
+ pipe = self.pipelines[model_key]
680
+ model_info = MODEL_REGISTRY.get(model_key, {})
681
+
682
+ try:
683
+ if model_info.get("type") == "chat" or isinstance(pipe, TextGenerationPipeline):
684
+ messages = []
685
+ if system_prompt:
686
+ messages.append({"role": "system", "content": system_prompt})
687
+ messages.append({"role": "user", "content": prompt})
688
+
689
+ result = pipe(
690
+ messages,
691
+ max_new_tokens=max_tokens,
692
+ temperature=temperature,
693
+ top_p=top_p,
694
+ do_sample=True,
695
+ return_full_text=False,
696
+ )
697
+
698
+ if isinstance(result, list):
699
+ output = result[0]["generated_text"]
700
+ if isinstance(output, str):
701
+ return output.strip()
702
+ elif isinstance(output, list):
703
+ return output[-1].get("content", "").strip()
704
+ elif isinstance(result, dict):
705
+ output = result.get("generated_text", "")
706
+ if isinstance(output, str):
707
+ return output.strip()
708
+
709
+ elif isinstance(pipe, InferenceClient):
710
+ messages = []
711
+ if system_prompt:
712
+ messages.append({"role": "system", "content": system_prompt})
713
+ messages.append({"role": "user", "content": prompt})
714
+
715
+ response = pipe.chat_completion(
716
+ messages,
717
+ max_tokens=max_tokens,
718
+ temperature=temperature,
719
+ )
720
+ return response.choices[0].message.content.strip()
721
+
722
  else:
723
+ # Seq2seq pipeline
724
+ result = pipe(
725
+ prompt,
726
+ max_length=min(max_tokens + len(prompt.split()), 1024),
727
+ temperature=temperature,
728
+ do_sample=True,
729
  )
730
+ if isinstance(result, list) and len(result) > 0:
731
+ return result[0]["generated_text"].strip()
732
+
 
733
  except Exception as e:
734
+ logger.error(f"Generation error with {model_key}: {e}")
735
+ return ""
736
+
737
+ return ""
738
+
739
+
740
+ # ─────────────────────────────────────────────
741
+ # Humanization Engine
742
+ # ─────────────────────────────────────────────
743
+ class HumanizationEngine:
744
+ """Core humanization engine with multi-model ensemble."""
745
+
746
+ def __init__(self):
747
+ self.model_manager = ModelManager()
748
+ self.ai_detector = AIDetector()
749
+ self.readability = ReadabilityAnalyzer()
750
+ self.similarity = SimilarityScorer()
751
+ self.analyzer = TextAnalyzer()
752
+ self.initialized = False
753
+
754
+ def initialize(self):
755
+ """Initialize all components."""
756
+ if self.initialized:
757
+ return
758
+
759
+ logger.info("Initializing Humanization Engine...")
760
+ self.model_manager.load_models()
761
+ self.initialized = True
762
+ logger.info("βœ… Engine initialized")
763
+
764
+ def humanize(
765
+ self,
766
+ text: str,
767
+ config: PipelineConfig,
768
+ ) -> HumanizationResult:
769
+ """Main humanization pipeline."""
770
+ start_time = time.time()
771
+ strategies = []
772
+
773
+ # Pre-analysis
774
+ ai_prob_before = self.ai_detector.detect(text)
775
+ readability_before = self.readability.analyze(text)
776
+ word_count_before = len(text.split())
777
+ ai_patterns = self.analyzer.detect_ai_patterns(text)
778
+
779
+ # Get appropriate prompt template
780
+ mode = config.mode
781
+ if mode not in PROMPT_TEMPLATES:
782
+ mode = "balanced"
783
+ template = PROMPT_TEMPLATES[mode]
784
+
785
+ # Apply pre-processing transformations
786
+ processed_text = text
787
+ if config.remove_patterns:
788
+ processed_text = self._remove_ai_patterns(processed_text, strategies)
 
 
789
  if config.add_imperfections:
790
+ processed_text = self._add_human_imperfections(processed_text, strategies)
 
 
 
 
 
 
 
 
 
 
 
 
791
 
792
+ # Primary model generation
793
+ prompt = template["user"].format(text=processed_text)
794
+ system_prompt = template["system"]
795
+
796
+ primary_model = "llama_3_70b" if "llama_3_70b" in self.model_manager.pipelines else (
797
+ "gemma_2_27b" if "gemma_2_27b" in self.model_manager.pipelines else
798
+ "mistral_7b" if "mistral_7b" in self.model_manager.pipelines else
799
+ list(self.model_manager.pipelines.keys())[0] if self.model_manager.pipelines else None
 
 
 
 
 
 
 
 
 
 
 
 
 
800
  )
801
+
802
+ if primary_model is None:
803
+ raise RuntimeError("No models available for humanization")
804
+
805
+ humanized_text = self.model_manager.generate(
806
+ model_key=primary_model,
807
+ prompt=prompt,
808
+ system_prompt=system_prompt,
809
+ temperature=config.temperature,
810
+ top_p=config.top_p,
811
+ max_tokens=config.max_tokens,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
812
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
813
 
814
+ if not humanized_text:
815
+ raise RuntimeError("Model returned empty output")
816
+
817
+ strategies.append(f"primary_rewrite_{primary_model}")
818
+
819
+ # Ensemble: secondary model refinement
820
+ if config.ensemble and len(self.model_manager.pipelines) > 1:
821
+ secondary_models = [
822
+ k for k in self.model_manager.pipelines.keys()
823
+ if k != primary_model
824
+ ][:2] # Use up to 2 secondary models
825
+
826
+ for sec_model in secondary_models:
827
+ refine_prompt = f"""Take this text and make it sound even MORE human. Add natural flow, vary sentence rhythm, and ensure it reads like authentic human writing. Don't change the meaning.
828
+
829
+ Text to refine:
830
+ {humanized_text[:3000]}"""
831
+
832
+ refined = self.model_manager.generate(
833
+ model_key=sec_model,
834
+ prompt=refine_prompt,
835
+ system_prompt="You are an expert editor who makes text sound deeply human. Your edits are subtle but transformative.",
836
+ temperature=config.temperature * 0.8,
837
+ top_p=config.top_p,
838
+ max_tokens=config.max_tokens,
839
  )
840
+
841
+ if refined and len(refined) > len(humanized_text) * 0.5:
842
+ # Compare quality - choose the better one
843
+ ai_prob_refined = self.ai_detector.detect(refined)
844
+ ai_prob_current = self.ai_detector.detect(humanized_text)
845
+ if ai_prob_refined < ai_prob_current:
846
+ humanized_text = refined
847
+ strategies.append(f"ensemble_refined_{sec_model}")
848
+ else:
849
+ strategies.append(f"ensemble_attempted_{sec_model}")
850
+
851
+ # Post-processing
852
+ if config.vary_sentence_length:
853
+ humanized_text = self._vary_sentence_structure(humanized_text, strategies)
854
+ if config.add_transitions:
855
+ humanized_text = self._improve_transitions(humanized_text, strategies)
856
+ if config.add_personal_touch:
857
+ humanized_text = self._add_personal_elements(humanized_text, strategies)
858
+
859
+ # Post-analysis
860
+ ai_prob_after = self.ai_detector.detect(humanized_text)
861
+ readability_after = self.readability.analyze(humanized_text)
862
+ word_count_after = len(humanized_text.split())
863
+ similarity_score = self.similarity.score(text, humanized_text)
864
+ processing_time = time.time() - start_time
865
+
866
+ # Count changes
867
+ orig_words = set(text.lower().split())
868
+ new_words = set(humanized_text.lower().split())
869
+ changes = len(orig_words.symmetric_difference(new_words))
870
+
871
+ return HumanizationResult(
872
+ original=text,
873
+ humanized=humanized_text,
874
+ model_used=primary_model,
875
+ mode=mode,
876
+ changes_made=changes,
877
+ similarity_score=similarity_score,
878
+ readability_before=readability_before,
879
+ readability_after=readability_after,
880
+ ai_probability_before=ai_prob_before,
881
+ ai_probability_after=ai_prob_after,
882
+ processing_time=processing_time,
883
+ strategies_applied=strategies,
884
+ word_count_before=word_count_before,
885
+ word_count_after=word_count_after,
886
+ perplexity_before=readability_before.get("perplexity", 0),
887
+ perplexity_after=readability_after.get("perplexity", 0),
888
+ )
889
+
890
+ def _remove_ai_patterns(self, text: str, strategies: List[str]) -> str:
891
+ """Remove common AI writing patterns."""
892
+ replacements = {
893
+ r"\bIn conclusion\b": "So",
894
+ r"\bFurthermore\b": "Plus",
895
+ r"\bMoreover\b": "Also",
896
+ r"\bAdditionally\b": "On top of that",
897
+ r"\bIt's important to note\b": "Keep in mind",
898
+ r"\bIt is important to note\b": "Keep in mind",
899
+ r"\bdelve into\b": "look into",
900
+ r"\bdelve deep\b": "dig into",
901
+ r"\btapestry\b": "mix",
902
+ r"\btestament to\b": "shows",
903
+ r"\bin the realm of\b": "in",
904
+ r"\bin today's world\b": "these days",
905
+ r"\bever-evolving\b": "changing",
906
+ r"\brapidly evolving\b": "fast-changing",
907
+ r"\bharness the power of\b": "use",
908
+ r"\bleverage\b": "use",
909
+ r"\butilize\b": "use",
910
+ r"\bpivotal role\b": "big role",
911
+ r"\bshed light on\b": "explain",
912
+ r"\bfoster a sense of\b": "create",
913
+ r"\bin essence\b": "Basically",
914
+ r"\bin summary\b": "To wrap up",
915
+ }
916
+
917
+ for pattern, replacement in replacements.items():
918
+ if re.search(pattern, text, re.IGNORECASE):
919
+ text = re.sub(pattern, replacement, text, flags=re.IGNORECASE)
920
+ strategies.append(f"replaced_ai_pattern_{pattern}")
921
+
922
+ return text
923
+
924
+ def _add_human_imperfections(self, text: str, strategies: List[str]) -> str:
925
+ """Add subtle human imperfections."""
926
+ sentences = sent_tokenize(text)
927
+ if len(sentences) < 2:
928
+ return text
929
+
930
+ # Occasionally start sentences with conjunctions
931
+ conjunctions = ["And", "But", "So", "Because", "Though"]
932
+ for i, sent in enumerate(sentences):
933
+ if i > 0 and random.random() < 0.15:
934
+ conj = random.choice(conjunctions)
935
+ sentences[i] = sent[0].lower() if sent else sent
936
+ sentences[i] = f"{conj} {sentences[i]}"
937
+
938
+ text = " ".join(sentences)
939
+ strategies.append("added_conjunction_starts")
940
+ return text
941
+
942
+ def _vary_sentence_structure(self, text: str, strategies: List[str]) -> str:
943
+ """Vary sentence structure for more natural flow."""
944
+ sentences = sent_tokenize(text)
945
+ if len(sentences) < 3:
946
+ return text
947
+
948
+ new_sentences = []
949
+ for sent in sentences:
950
+ words = sent.split()
951
+ if len(words) > 25 and random.random() < 0.4:
952
+ # Split long sentences
953
+ mid = len(words) // 2
954
+ # Find a good split point
955
+ for i in range(mid - 5, mid + 5):
956
+ if i > 0 and i < len(words) and words[i] in [",", "and", "but", "which", "that", "where", "when"]:
957
+ part1 = " ".join(words[:i + 1])
958
+ part2 = " ".join(words[i + 1:])
959
+ if part2:
960
+ part2 = part2[0].upper() + part2[1:]
961
+ new_sentences.append(part1.strip(" ,"))
962
+ new_sentences.append(part2.strip())
963
+ break
964
+ else:
965
+ new_sentences.append(sent)
966
+ else:
967
+ new_sentences.append(sent)
968
+
969
+ text = " ".join(new_sentences)
970
+ strategies.append("varied_sentence_structure")
971
+ return text
972
+
973
+ def _improve_transitions(self, text: str, strategies: List[str]) -> str:
974
+ """Improve transitions between ideas."""
975
+ human_transitions = [
976
+ "Here's the thing:", "The thing is,", "Look,",
977
+ "Honestly,", "Real talk,", "Here's what I mean:",
978
+ "What this means is:", "Put simply,", "The way I see it,",
979
+ "At the end of the day,", "When you think about it,",
980
+ ]
981
+
982
+ sentences = sent_tokenize(text)
983
+ if len(sentences) < 4:
984
+ return text
985
+
986
+ # Add a transition at ~30% mark
987
+ insert_pos = len(sentences) // 3
988
+ if insert_pos > 0 and insert_pos < len(sentences):
989
+ transition = random.choice(human_transitions)
990
+ sentences[insert_pos] = f"{transition} {sentences[insert_pos][0].lower() + sentences[insert_pos][1:] if sentences[insert_pos] else sentences[insert_pos]}"
991
+
992
+ text = " ".join(sentences)
993
+ strategies.append("improved_transitions")
994
+ return text
995
+
996
+ def _add_personal_elements(self, text: str, strategies: List[str]) -> str:
997
+ """Add personal-feeling elements."""
998
+ personal_phrases = [
999
+ "I've found that", "From my experience,", "I think",
1000
+ "It seems like", "I'd say", "If you ask me,",
1001
+ "In my view,", "What I've noticed is",
1002
+ ]
1003
+
1004
+ sentences = sent_tokenize(text)
1005
+ if len(sentences) < 3:
1006
+ return text
1007
+
1008
+ # Add personal phrase at beginning of second paragraph
1009
+ if len(sentences) > 4:
1010
+ insert_pos = min(4, len(sentences) - 1)
1011
+ phrase = random.choice(personal_phrases)
1012
+ sentences[insert_pos] = f"{phrase} {sentences[insert_pos][0].lower() + sentences[insert_pos][1:] if sentences[insert_pos] else sentences[insert_pos]}"
1013
+
1014
+ text = " ".join(sentences)
1015
+ strategies.append("added_personal_elements")
1016
+ return text
1017
+
1018
+ def batch_humanize(
1019
+ self,
1020
+ texts: List[str],
1021
+ config: PipelineConfig,
1022
+ progress=gr.Progress(),
1023
+ ) -> List[HumanizationResult]:
1024
+ """Process multiple texts."""
1025
+ results = []
1026
+ for i, text in enumerate(texts):
1027
+ progress((i + 1) / len(texts), desc=f"Processing {i + 1}/{len(texts)}")
1028
+ try:
1029
+ result = self.humanize(text, config)
1030
+ results.append(result)
1031
+ except Exception as e:
1032
+ logger.error(f"Error processing text {i}: {e}")
1033
+ results.append(HumanizationResult(
1034
+ original=text,
1035
+ humanized=f"[Error: {str(e)}]",
1036
+ model_used="error",
1037
+ mode=config.mode,
1038
+ changes_made=0,
1039
+ similarity_score=0,
1040
+ readability_before={},
1041
+ readability_after={},
1042
+ ai_probability_before=0,
1043
+ ai_probability_after=0,
1044
+ processing_time=0,
1045
+ strategies_applied=[],
1046
+ word_count_before=len(text.split()),
1047
+ word_count_after=0,
1048
+ perplexity_before=0,
1049
+ perplexity_after=0,
1050
+ ))
1051
+ return results
1052
+
1053
+
1054
+ # ─────────────────────────────────────────────
1055
+ # Gradio UI Builder
1056
+ # ─────────────────────────────────────────────
1057
+ class HumanizerApp:
1058
+ """Gradio application for the humanizer."""
1059
+
1060
+ def __init__(self):
1061
+ self.engine = HumanizationEngine()
1062
+ self.theme = self._build_theme()
1063
+
1064
+ @staticmethod
1065
+ def _build_theme():
1066
+ """Build custom Gradio theme."""
1067
+ from gradio.themes import Base, Default
1068
+
1069
+ theme = Default(
1070
+ primary_hue="emerald",
1071
+ secondary_hue="blue",
1072
+ font=gr.themes.GoogleFont("Inter"),
1073
+ )
1074
+ return theme
1075
+
1076
+ def build_interface(self) -> gr.Blocks:
1077
+ """Build the complete Gradio interface."""
1078
+ with gr.Blocks(
1079
+ theme=self.theme,
1080
+ title="🧬 Advanced AI Text Humanizer",
1081
+ css=self._get_custom_css(),
1082
+ ) as app:
1083
+ gr.Markdown("""
1084
+ # 🧬 Advanced AI Text Humanizer
1085
+ ### Transform AI-generated text into authentic human writing using multi-model ensemble
1086
+
1087
+ **Powered by:** Llama 3.3 70B β€’ Mistral 7B β€’ Gemma 2 27B β€’ Zephyr 7B β€’ BART
1088
+ """)
1089
+
1090
+ with gr.Tabs():
1091
+ # ── Tab 1: Single Text ──
1092
+ with gr.Tab("πŸ“ Single Text"):
1093
+ with gr.Row():
1094
+ with gr.Column(scale=1):
1095
+ input_text = gr.Textbox(
1096
+ label="πŸ“„ Input Text",
1097
+ placeholder="Paste your AI-generated text here...",
1098
+ lines=12,
1099
+ max_lines=50,
1100
+ )
1101
+ with gr.Row():
1102
+ humanize_btn = gr.Button(
1103
+ "✨ Humanize Text",
1104
+ variant="primary",
1105
+ size="lg",
1106
+ )
1107
+ clear_btn = gr.Button("πŸ—‘οΈ Clear", variant="secondary")
1108
+ with gr.Accordion("βš™οΈ Advanced Settings", open=False):
1109
+ with gr.Row():
1110
+ mode = gr.Dropdown(
1111
+ choices=[
1112
+ ("🎯 Balanced", "balanced"),
1113
+ ("😊 Casual", "casual"),
1114
+ ("πŸ’Ό Professional", "professional"),
1115
+ ("🎨 Creative", "creative"),
1116
+ ("πŸ“š Academic", "academic"),
1117
+ ],
1118
+ value="balanced",
1119
+ label="Writing Mode",
1120
+ )
1121
+ intensity = gr.Slider(
1122
+ minimum=0.1,
1123
+ maximum=1.0,
1124
+ value=0.5,
1125
+ step=0.1,
1126
+ label="Intensity",
1127
+ )
1128
+ with gr.Row():
1129
+ temperature = gr.Slider(
1130
+ minimum=0.1,
1131
+ maximum=1.5,
1132
+ value=0.7,
1133
+ step=0.1,
1134
+ label="Temperature",
1135
+ )
1136
+ top_p = gr.Slider(
1137
+ minimum=0.1,
1138
+ maximum=1.0,
1139
+ value=0.9,
1140
+ step=0.05,
1141
+ label="Top-P",
1142
+ )
1143
+ with gr.Row():
1144
+ ensemble = gr.Checkbox(
1145
+ value=True,
1146
+ label="πŸ”„ Ensemble Mode",
1147
+ )
1148
+ preserve_meaning = gr.Checkbox(
1149
+ value=True,
1150
+ label="🎯 Preserve Meaning",
1151
+ )
1152
+ with gr.Row():
1153
+ add_imperfections = gr.Checkbox(
1154
+ value=True,
1155
+ label="✨ Add Imperfections",
1156
+ )
1157
+ vary_sentence_length = gr.Checkbox(
1158
+ value=True,
1159
+ label="πŸ“ Vary Sentence Length",
1160
+ )
1161
+ with gr.Row():
1162
+ add_transitions = gr.Checkbox(
1163
+ value=True,
1164
+ label="πŸ”— Add Transitions",
1165
+ )
1166
+ add_personal_touch = gr.Checkbox(
1167
+ value=True,
1168
+ label="πŸ’­ Add Personal Touch",
1169
+ )
1170
+
1171
+ with gr.Column(scale=1):
1172
+ output_text = gr.Textbox(
1173
+ label="πŸ“ Humanized Output",
1174
+ placeholder="Humanized text will appear here...",
1175
+ lines=12,
1176
+ max_lines=50,
1177
+ )
1178
+ with gr.Row():
1179
+ copy_btn = gr.Button("πŸ“‹ Copy", variant="secondary")
1180
+ download_btn = gr.Button("πŸ’Ύ Download", variant="secondary")
1181
+
1182
+ # ── Tab 2: Batch Processing ──
1183
+ with gr.Tab("πŸ“¦ Batch Processing"):
1184
+ gr.Markdown("### Process multiple texts at once")
1185
+ batch_input = gr.Textbox(
1186
+ label="πŸ“„ Input Texts (one per line, separated by ---)",
1187
+ placeholder="Text 1...\n---\nText 2...\n---\nText 3...",
1188
+ lines=15,
1189
+ max_lines=100,
1190
+ )
1191
+ batch_btn = gr.Button("πŸš€ Batch Humanize", variant="primary", size="lg")
1192
+ batch_output = gr.Dataframe(
1193
+ headers=["Original", "Humanized", "AI Score Before", "AI Score After", "Similarity"],
1194
+ label="Results",
1195
+ )
1196
+
1197
+ # ── Tab 3: Analysis Dashboard ──
1198
+ with gr.Tab("πŸ“Š Analysis"):
1199
+ with gr.Row():
1200
+ with gr.Column():
1201
+ analysis_input = gr.Textbox(
1202
+ label="πŸ“„ Text to Analyze",
1203
+ lines=8,
1204
+ )
1205
+ analyze_btn = gr.Button("πŸ” Analyze", variant="primary")
1206
+ with gr.Column():
1207
+ ai_score_gauge = gr.Number(
1208
+ label="AI Probability Score",
1209
+ )
1210
+ readability_output = gr.JSON(label="Readability Metrics")
1211
+
1212
+ # ── Tab 4: Comparison View ──
1213
+ with gr.Tab("πŸ”€ Side-by-Side Comparison"):
1214
+ compare_input = gr.Textbox(
1215
+ label="πŸ“„ Input Text",
1216
+ lines=8,
1217
+ )
1218
+ compare_btn = gr.Button("πŸ” Compare", variant="primary")
1219
+ with gr.Row():
1220
+ compare_original = gr.Textbox(
1221
+ label="Original",
1222
+ lines=12,
1223
+ )
1224
+ compare_humanized = gr.Textbox(
1225
+ label="Humanized",
1226
+ lines=12,
1227
+ )
1228
+ diff_output = gr.HTML(label="πŸ“ Diff View")
1229
+
1230
+ # ── Results Panel (shared) ──
1231
+ with gr.Accordion("πŸ“Š Detailed Results", open=True):
1232
+ with gr.Row():
1233
+ with gr.Column():
1234
+ stats_json = gr.JSON(label="πŸ“ˆ Processing Statistics")
1235
+ with gr.Column():
1236
+ ai_reduction = gr.Plot(label="πŸ“‰ AI Detection Reduction")
1237
+ strategies_output = gr.Textbox(
1238
+ label="πŸ› οΈ Strategies Applied",
1239
+ lines=3,
1240
  )
1241
+
1242
+ # ── Footer ──
1243
+ gr.Markdown("""
1244
+ ---
1245
+ ### πŸ’‘ Tips for Best Results
1246
+ - **Balanced mode** works great for most use cases
1247
+ - **Higher intensity** = more aggressive rewriting
1248
+ - **Ensemble mode** uses multiple models for best quality
1249
+ - For short texts (<100 words), try **Casual** or **Creative** mode
1250
+ - For long texts (>500 words), use **Professional** or **Academic** mode
1251
+ - Adjust **Temperature** for more/less creative output
1252
+ """)
1253
+
1254
+ # ── Event Handlers ──
1255
+ humanize_btn.click(
1256
+ fn=self._handle_humanize,
1257
+ inputs=[
1258
+ input_text, mode, intensity, temperature, top_p,
1259
+ ensemble, preserve_meaning, add_imperfections,
1260
+ vary_sentence_length, add_transitions, add_personal_touch,
1261
+ ],
1262
+ outputs=[output_text, stats_json, strategies_output],
1263
+ )
1264
+
1265
+ clear_btn.click(
1266
+ fn=lambda: ("", "", {}),
1267
+ inputs=[],
1268
+ outputs=[input_text, output_text, stats_json],
1269
  )
1270
+
1271
+ batch_btn.click(
1272
+ fn=self._handle_batch,
1273
+ inputs=[batch_input, mode, intensity, temperature, top_p, ensemble],
1274
+ outputs=[batch_output],
1275
  )
1276
+
1277
+ analyze_btn.click(
1278
+ fn=self._handle_analyze,
1279
+ inputs=[analysis_input],
1280
+ outputs=[ai_score_gauge, readability_output],
 
 
 
1281
  )
1282
+
1283
+ compare_btn.click(
1284
+ fn=self._handle_compare,
1285
+ inputs=[compare_input, mode, intensity, temperature, top_p, ensemble],
1286
+ outputs=[compare_original, compare_humanized, diff_output, stats_json],
1287
+ )
1288
+
1289
+ copy_btn.click(
1290
+ fn=self._copy_text,
1291
+ inputs=[output_text],
1292
+ outputs=[],
1293
+ )
1294
+
1295
+ download_btn.click(
1296
+ fn=self._download_text,
1297
+ inputs=[output_text],
1298
+ outputs=[],
1299
+ )
1300
+
1301
+ return app
1302
+
1303
+ def _build_config(self, mode, intensity, temperature, top_p, ensemble,
1304
+ preserve_meaning, add_imperfections, vary_sentence_length,
1305
+ add_transitions, add_personal_touch) -> PipelineConfig:
1306
+ """Build PipelineConfig from UI inputs."""
1307
+ return PipelineConfig(
1308
+ mode=mode,
1309
+ intensity=intensity,
1310
+ temperature=temperature,
1311
+ top_p=top_p,
1312
+ ensemble=ensemble,
1313
+ preserve_meaning=preserve_meaning,
1314
+ add_imperfections=add_imperfections,
1315
+ vary_sentence_length=vary_sentence_length,
1316
+ add_transitions=add_transitions,
1317
+ add_personal_touch=add_personal_touch,
1318
+ max_tokens=int(intensity * 2048) + 512,
1319
+ )
1320
+
1321
+ def _handle_humanize(self, text, mode, intensity, temperature, top_p,
1322
+ ensemble, preserve_meaning, add_imperfections,
1323
+ vary_sentence_length, add_transitions, add_personal_touch):
1324
+
1325
+ self.engine.initialize()
1326
+
1327
+ if not text.strip():
1328
+ return "Please enter some text to humanize.", {}, ""
1329
+
1330
+ config = self._build_config(
1331
+ mode, intensity, temperature, top_p, ensemble,
1332
+ preserve_meaning, add_imperfections, vary_sentence_length,
1333
+ add_transitions, add_personal_touch,
1334
+ )
1335
+
1336
+ result = self.engine.humanize(text, config)
1337
+
1338
+ stats = {
1339
+ "πŸ€– Model Used": MODEL_REGISTRY.get(result.model_used, {}).get("name", result.model_used),
1340
+ "πŸ“ Mode": result.mode,
1341
+ "⏱️ Processing Time": f"{result.processing_time:.2f}s",
1342
+ "πŸ“Š Word Count": f"{result.word_count_before} β†’ {result.word_count_after}",
1343
+ "πŸ”„ Changes Made": result.changes_made,
1344
+ "🎯 Semantic Similarity": f"{result.similarity_score:.1%}",
1345
+ "πŸ€– AI Score Before": f"{result.ai_probability_before:.1%}",
1346
+ "πŸ€– AI Score After": f"{result.ai_probability_after:.1%}",
1347
+ "πŸ“‰ AI Reduction": f"{(result.ai_probability_before - result.ai_probability_after):.1%}",
1348
+ "πŸ“ Avg Words/Sentence (Before)": f"{result.readability_before.get('avg_words_per_sentence', 0):.1f}",
1349
+ "πŸ“ Avg Words/Sentence (After)": f"{result.readability_after.get('avg_words_per_sentence', 0):.1f}",
1350
+ "🌊 Burstiness (After)": f"{result.readability_after.get('burstiness', 0):.1f}",
1351
+ }
1352
+
1353
+ strategies = "\n".join(f"βœ… {s}" for s in result.strategies_applied)
1354
+
1355
+ return result.humanized, stats, strategies
1356
+
1357
+ def _handle_batch(self, batch_input, mode, intensity, temperature, top_p, ensemble):
1358
+ self.engine.initialize()
1359
+
1360
+ texts = [t.strip() for t in batch_input.split("---") if t.strip()]
1361
+ if not texts:
1362
+ texts = [line.strip() for line in batch_input.strip().split("\n") if line.strip()]
1363
+
1364
+ if not texts:
1365
+ return [["No input provided"]]
1366
+
1367
+ config = self._build_config(
1368
+ mode, intensity, temperature, top_p, ensemble,
1369
+ True, True, True, True, True,
1370
+ )
1371
+
1372
+ results = self.engine.batch_humanize(texts, config)
1373
+
1374
+ table = []
1375
+ for r in results:
1376
+ table.append([
1377
+ r.original[:200] + "..." if len(r.original) > 200 else r.original,
1378
+ r.humanized[:200] + "..." if len(r.humanized) > 200 else r.humanized,
1379
+ f"{r.ai_probability_before:.1%}",
1380
+ f"{r.ai_probability_after:.1%}",
1381
+ f"{r.similarity_score:.1%}",
1382
+ ])
1383
+
1384
+ return table
1385
+
1386
+ def _handle_analyze(self, text):
1387
+ self.engine.initialize()
1388
+
1389
+ ai_score = self.engine.ai_detector.detect(text)
1390
+ readability = self.engine.readability.analyze(text)
1391
+
1392
+ return ai_score, readability
1393
+
1394
+ def _handle_compare(self, text, mode, intensity, temperature, top_p, ensemble):
1395
+ self.engine.initialize()
1396
+
1397
+ config = self._build_config(
1398
+ mode, intensity, temperature, top_p, ensemble,
1399
+ True, True, True, True, True,
1400
+ )
1401
+
1402
+ result = self.engine.humanize(text, config)
1403
+ diff_html = self.engine.analyzer.get_diff_html(result.original, result.humanized)
1404
+
1405
+ stats = {
1406
+ "πŸ€– Model": MODEL_REGISTRY.get(result.model_used, {}).get("name", ""),
1407
+ "⏱️ Time": f"{result.processing_time:.2f}s",
1408
+ "πŸ“Š Words": f"{result.word_count_before} β†’ {result.word_count_after}",
1409
+ "πŸ€– AI Score": f"{result.ai_probability_before:.1%} β†’ {result.ai_probability_after:.1%}",
1410
+ }
1411
+
1412
+ return result.original, result.humanized, diff_html, stats
1413
+
1414
+ def _copy_text(self, text):
1415
+ """Copy text to clipboard (client-side handled via JS)."""
1416
+ return None
1417
+
1418
+ def _download_text(self, text):
1419
+ """Download text as file."""
1420
+ return None
1421
+
1422
+ @staticmethod
1423
+ def _get_custom_css():
1424
+ """Custom CSS for the app."""
1425
+ return """
1426
+ .gradio-container {
1427
+ max-width: 1400px !important;
1428
+ }
1429
+ .main-text textarea {
1430
+ font-size: 15px !important;
1431
+ line-height: 1.6 !important;
1432
+ }
1433
+ #diff-view {
1434
+ font-family: 'Inter', sans-serif;
1435
+ font-size: 14px;
1436
+ line-height: 1.8;
1437
+ padding: 20px;
1438
+ background: #f8f9fa;
1439
+ border-radius: 8px;
1440
+ }
1441
+ .stat-card {
1442
+ background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
1443
+ color: white;
1444
+ padding: 15px;
1445
+ border-radius: 10px;
1446
+ text-align: center;
1447
+ }
1448
+ .footer {
1449
+ text-align: center;
1450
+ padding: 20px;
1451
+ color: #666;
1452
+ font-size: 12px;
1453
+ }
1454
+ """
1455
+
1456
+
1457
+ # ─────────────────────────────────────────────
1458
+ # Launch
1459
+ # ─────────────────���───────────────────────────
1460
+ def main():
1461
+ """Main entry point."""
1462
+ logger.info("πŸš€ Starting Advanced AI Text Humanizer...")
1463
+
1464
+ app = HumanizerApp()
1465
+ interface = app.build_interface()
1466
+
1467
+ # Launch configuration
1468
+ launch_kwargs = {
1469
+ "server_name": "0.0.0.0",
1470
+ "server_port": int(os.environ.get("PORT", 7860)),
1471
+ "share": False,
1472
+ "show_error": True,
1473
+ "max_threads": 10,
1474
+ "queue": True,
1475
+ "default_concurrency_limit": 4,
1476
+ }
1477
+
1478
+ # Enable auth if configured
1479
+ username = os.environ.get("GRADIO_USERNAME")
1480
+ password = os.environ.get("GRADIO_PASSWORD")
1481
+ if username and password:
1482
+ launch_kwargs["auth"] = (username, password)
1483
+
1484
+ logger.info("Launching Gradio interface...")
1485
+ interface.launch(**launch_kwargs)
1486
+
1487
 
1488
  if __name__ == "__main__":
1489
+ main()