tyfsadik commited on
Commit
1702db7
·
verified ·
1 Parent(s): b2bb7ef

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +425 -0
app.py ADDED
@@ -0,0 +1,425 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # app.py - Advanced Deep Humanizer for Hugging Face Spaces
2
+ # Optimized for A100/H100 GPUs - Premium Configuration
3
+
4
+ import gradio as gr
5
+ import torch
6
+ import random
7
+ import re
8
+ import json
9
+ from transformers import (
10
+ AutoModelForCausalLM,
11
+ AutoTokenizer,
12
+ pipeline,
13
+ BitsAndBytesConfig
14
+ )
15
+ from typing import List, Dict, Tuple
16
+ import numpy as np
17
+ from dataclasses import dataclass
18
+ import spaces # Hugging Face Spaces utility for GPU management
19
+
20
+ @dataclass
21
+ class HumanizationConfig:
22
+ temperature: float = 0.8
23
+ top_p: float = 0.92
24
+ repetition_penalty: float = 1.15
25
+ max_length: int = 4096
26
+ style_intensity: str = "medium" # light, medium, aggressive
27
+ preserve_meaning: bool = True
28
+ add_imperfections: bool = True
29
+ burstiness_factor: float = 0.3 # Variation in sentence length
30
+ perplexity_target: float = 25.0 # Human text usually 15-30
31
+
32
+ class DeepHumanizer:
33
+ def __init__(self):
34
+ self.model_id = "meta-llama/Llama-3.3-70B-Instruct" # Premium model
35
+ # Alternative: "Qwen/Qwen2.5-72B-Instruct" or "deepseek-ai/DeepSeek-V3"
36
+
37
+ self.tokenizer = None
38
+ self.model = None
39
+ self.device = "cuda" if torch.cuda.is_available() else "cpu"
40
+ self.initialize_model()
41
+
42
+ def initialize_model(self):
43
+ """Initialize 70B model with 4-bit quantization for single A100 80GB"""
44
+ print(f"Initializing {self.model_id} on {self.device}...")
45
+
46
+ # 4-bit quantization config for 70B on single A100 80GB
47
+ quantization_config = BitsAndBytesConfig(
48
+ load_in_4bit=True,
49
+ bnb_4bit_compute_dtype=torch.float16,
50
+ bnb_4bit_quant_type="nf4",
51
+ bnb_4bit_use_double_quant=True,
52
+ )
53
+
54
+ self.tokenizer = AutoTokenizer.from_pretrained(
55
+ self.model_id,
56
+ trust_remote_code=True,
57
+ padding_side="left"
58
+ )
59
+
60
+ if self.tokenizer.pad_token is None:
61
+ self.tokenizer.pad_token = self.tokenizer.eos_token
62
+
63
+ # Load model with acceleration
64
+ self.model = AutoModelForCausalLM.from_pretrained(
65
+ self.model_id,
66
+ quantization_config=quantization_config,
67
+ device_map="auto",
68
+ trust_remote_code=True,
69
+ torch_dtype=torch.float16,
70
+ attn_implementation="flash_attention_2" # Speed optimization
71
+ )
72
+
73
+ self.model.eval()
74
+ print("Model loaded successfully")
75
+
76
+ def calculate_perplexity(self, text: str) -> float:
77
+ """Calculate perplexity score (lower is more predictable/AI-like)"""
78
+ encodings = self.tokenizer(text, return_tensors="pt")
79
+ input_ids = encodings.input_ids.to(self.device)
80
+
81
+ with torch.no_grad():
82
+ outputs = self.model(input_ids, labels=input_ids)
83
+ loss = outputs.loss
84
+
85
+ perplexity = torch.exp(loss).item()
86
+ return perplexity
87
+
88
+ def analyze_text_patterns(self, text: str) -> Dict:
89
+ """Analyze writing patterns to identify AI characteristics"""
90
+ sentences = re.split(r'(?<=[.!?])\s+', text)
91
+ words = text.split()
92
+
93
+ # Calculate burstiness (variation in sentence length)
94
+ if len(sentences) > 1:
95
+ sent_lengths = [len(s.split()) for s in sentences]
96
+ burstiness = np.std(sent_lengths) / (np.mean(sent_lengths) + 1e-8)
97
+ else:
98
+ burstiness = 0
99
+
100
+ # Common AI patterns
101
+ ai_patterns = [
102
+ r'\b(delve|leverage|utilize|facilitate|optimize)\b',
103
+ r'\b(In conclusion|Furthermore|Moreover|Additionally)\b',
104
+ r'\b(It is important to note that|It should be noted that)\b',
105
+ r'(\b\w+\b)\s+\1', # Repetition
106
+ ]
107
+
108
+ pattern_matches = sum(len(re.findall(p, text, re.I)) for p in ai_patterns)
109
+
110
+ return {
111
+ "burstiness": burstiness,
112
+ "avg_sentence_length": np.mean([len(s.split()) for s in sentences]) if sentences else 0,
113
+ "ai_markers": pattern_matches,
114
+ "formality_score": self._estimate_formality(text)
115
+ }
116
+
117
+ def _estimate_formality(self, text: str) -> float:
118
+ """Estimate formality level 0-1"""
119
+ formal_words = r'\b(therefore|thus|hence|consequently|furthermore|moreover|nevertheless)\b'
120
+ informal_words = r'\b(so|but|anyway|actually|basically|like|you know)\b'
121
+
122
+ formal_count = len(re.findall(formal_words, text, re.I))
123
+ informal_count = len(re.findall(informal_words, text, re.I))
124
+
125
+ total = formal_count + informal_count
126
+ if total == 0:
127
+ return 0.5
128
+ return formal_count / total
129
+
130
+ def generate_humanization_prompt(self, text: str, config: HumanizationConfig,
131
+ style: str, analysis: Dict) -> str:
132
+ """Generate sophisticated system prompt based on analysis"""
133
+
134
+ imperfections_guide = ""
135
+ if config.add_imperfections:
136
+ imperfections_guide = """
137
+ - Include natural imperfections: occasional fragments, starting sentences with conjunctions (But, And, So)
138
+ - Vary punctuation usage naturally (em-dashes, occasional ellipses...)
139
+ - Add conversational fillers where appropriate (well, actually, you know what I mean)
140
+ - Break formal structure with rhetorical questions or personal asides
141
+ """
142
+
143
+ style_prompts = {
144
+ "casual": "Make it sound like a knowledgeable friend explaining over coffee. Use contractions, everyday vocabulary, personal anecdotes potential.",
145
+ "professional": "Keep it business-appropriate but warm. Like a smart colleague in a Slack message—not too stiff, not too loose.",
146
+ "academic": "Scholarly but accessible. Reduce robotic transitions but keep the rigor. Like a passionate professor speaking, not writing a textbook.",
147
+ "creative": "Vivid, varied sentence structures, rhythmic flow. Occasional metaphors, emotional undertones, unpredictable phrasing.",
148
+ "reddit": "Authentic internet voice. Like a high-karma r/depthhub or r/explainlikeimfive comment. Informative but colloquial.",
149
+ "twitter": "Sharp, punchy, tweet-thread style. Short sentences mixed with longer explanatory ones. Personality-forward."
150
+ }
151
+
152
+ style_instruction = style_prompts.get(style, style_prompts["casual"])
153
+
154
+ # Adjust based on detected patterns
155
+ if analysis["ai_markers"] > 3:
156
+ de_ai_instruction = "CRITICAL: Remove all AI-signaling phrases (delve, leverage, moreover, it is important to note). "
157
+ else:
158
+ de_ai_instruction = ""
159
+
160
+ prompt = f"""<|im_start|>system
161
+ You are an elite linguistic surgeon specializing in humanization of AI-generated text. Your task is to transform robotic, predictable text into authentic human writing that bypasses AI detection through natural variation and cognitive authenticity.
162
+
163
+ {style_instruction}
164
+ {de_ai_instruction}{imperfections_guide}
165
+
166
+ TECHNICAL REQUIREMENTS:
167
+ - Target perplexity: {config.perplexity_target} (human range)
168
+ - Burstiness factor: Inject {int(config.burstiness_factor * 100)}% variation in sentence length
169
+ - Maintain core meaning: {config.preserve_meaning}
170
+ - Output ONLY the rewritten text, no explanations, no markdown code blocks
171
+
172
+ HUMANIZATION LAYERS:
173
+ 1. Lexical variation: Replace generic AI terms with context-specific vocabulary
174
+ 2. Syntactic diversity: Mix simple, compound, complex sentences irregularly
175
+ 3. Semantic noise: Add slight ambiguity or subjective framing where appropriate
176
+ 4. Pragmatic markers: Include hesitation, self-correction, natural flow disruptions
177
+ 5. Cognitive fingerprint: Inject personal stance or mild opinion<|im_end|>
178
+ <|im_start|>user
179
+ Transform this text into deeply human writing:
180
+
181
+ {text}<|im_end|>
182
+ <|im_start|>assistant"""
183
+
184
+ return prompt
185
+
186
+ @spaces.GPU(duration=120) # HF Spaces GPU decorator
187
+ def humanize(self, text: str, style: str = "casual", intensity: str = "medium",
188
+ creativity: float = 0.8, add_typos: bool = False,
189
+ target_reading_level: str = "default") -> Tuple[str, Dict]:
190
+ """
191
+ Main humanization pipeline with multi-step refinement
192
+ """
193
+ config = HumanizationConfig(
194
+ temperature=creativity,
195
+ style_intensity=intensity,
196
+ add_imperfections=intensity in ["medium", "aggressive"]
197
+ )
198
+
199
+ # Step 1: Analysis
200
+ analysis = self.analyze_text_patterns(text)
201
+
202
+ # Step 2: Initial rewrite
203
+ prompt = self.generate_humanization_prompt(text, config, style, analysis)
204
+
205
+ inputs = self.tokenizer(prompt, return_tensors="pt").to(self.device)
206
+
207
+ with torch.no_grad():
208
+ outputs = self.model.generate(
209
+ **inputs,
210
+ max_new_tokens=len(text.split()) * 3, # Generous buffer
211
+ temperature=config.temperature,
212
+ top_p=config.top_p,
213
+ repetition_penalty=config.repetition_penalty,
214
+ do_sample=True,
215
+ pad_token_id=self.tokenizer.pad_token_id,
216
+ eos_token_id=self.tokenizer.eos_token_id,
217
+ )
218
+
219
+ decoded = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
220
+ # Extract only the assistant's response
221
+ humanized = decoded.split("assistant")[-1].strip()
222
+
223
+ # Step 3: Post-processing based on intensity
224
+ if intensity == "aggressive":
225
+ humanized = self._inject_aggressive_variation(humanized)
226
+ elif intensity == "light":
227
+ humanized = self._light_touch(humanized)
228
+
229
+ # Step 4: Optional imperfections
230
+ if add_typos and intensity == "aggressive":
231
+ humanized = self._add_natural_typos(humanized)
232
+
233
+ # Step 5: Metrics calculation
234
+ final_analysis = {
235
+ "original_perplexity": round(self.calculate_perplexity(text), 2),
236
+ "humanized_perplexity": round(self.calculate_perplexity(humanized), 2),
237
+ "burstiness_change": round(self.analyze_text_patterns(humanized)["burstiness"] - analysis["burstiness"], 2),
238
+ "human_score": self._calculate_human_score(humanized),
239
+ "processing_style": style,
240
+ "intensity": intensity
241
+ }
242
+
243
+ return humanized, final_analysis
244
+
245
+ def _inject_aggressive_variation(self, text: str) -> str:
246
+ """Add high-level human variation"""
247
+ # Randomly combine sentences with conjunctions
248
+ text = re.sub(r'\.\s+([A-Z])', lambda m: f", and {m.group(1).lower()}" if random.random() > 0.7 else f". {m.group(1)}", text)
249
+
250
+ # Add occasional fragments
251
+ sentences = text.split('. ')
252
+ if len(sentences) > 3 and random.random() > 0.5:
253
+ idx = random.randint(1, len(sentences)-2)
254
+ sentences[idx] = sentences[idx].split(',')[0] # Make first part a fragment
255
+ return '. '.join(sentences)
256
+
257
+ def _light_touch(self, text: str) -> str:
258
+ """Minimal changes, just polish"""
259
+ # Remove common AI transitions
260
+ text = re.sub(r'\b(In conclusion|To summarize|Overall),\s*', '', text, flags=re.I)
261
+ return text
262
+
263
+ def _add_natural_typos(self, text: str) -> str:
264
+ """Add believable human typos (use sparingly)"""
265
+ # Very subtle: duplicate letters occasionally
266
+ words = text.split()
267
+ for i in range(len(words)):
268
+ if random.random() > 0.98 and len(words[i]) > 4:
269
+ words[i] = words[i][:2] + words[i][1] + words[i][2:]
270
+ return ' '.join(words)
271
+
272
+ def _calculate_human_score(self, text: str) -> int:
273
+ """Estimate likelihood of passing as human 0-100"""
274
+ score = 70 # Base
275
+
276
+ # Check for AI markers
277
+ ai_markers = len(re.findall(r'\b(leverage|delve|utilize|facilitate|optimize)\b', text, re.I))
278
+ score -= ai_markers * 5
279
+
280
+ # Check variation
281
+ sentences = re.split(r'(?<=[.!?])\s+', text)
282
+ if len(sentences) > 1:
283
+ lengths = [len(s) for s in sentences]
284
+ variation = np.std(lengths) / np.mean(lengths)
285
+ if variation > 0.3: # Good burstiness
286
+ score += 15
287
+
288
+ # Check contractions
289
+ if len(re.findall(r"\b\w+'\w+\b", text)) > 0:
290
+ score += 10
291
+
292
+ return min(100, max(0, score))
293
+
294
+ # Initialize singleton
295
+ humanizer = DeepHumanizer()
296
+
297
+ # Gradio Interface
298
+ def process_text(text, style, intensity, creativity, add_imperfections, comparison_mode):
299
+ if not text.strip():
300
+ return "", {}, ""
301
+
302
+ humanized, metrics = humanizer.humanize(
303
+ text=text,
304
+ style=style,
305
+ intensity=intensity,
306
+ creativity=creativity,
307
+ add_typos=(add_imperfections == "Aggressive")
308
+ )
309
+
310
+ # Format metrics display
311
+ metrics_md = f"""
312
+ ### 📊 Analysis Results
313
+
314
+ | Metric | Value | Status |
315
+ |--------|-------|--------|
316
+ | **Human Likelihood Score** | {metrics['human_score']}/100 | {'🟢 Human' if metrics['human_score'] > 80 else '🟡 Unclear' if metrics['human_score'] > 60 else '🔵 AI'} |
317
+ | **Perplexity Change** | {metrics['original_perplexity']} → {metrics['humanized_perplexity']} | {'🟢 Good Variation' if metrics['humanized_perplexity'] > metrics['original_perplexity'] else '⚠️ Check needed'} |
318
+ | **Burstiness Delta** | +{metrics['burstiness_change']:.2f} | {'🟢 Natural Flow' if metrics['burstiness_change'] > 0 else '⚠️ Monotonous'} |
319
+ """
320
+
321
+ if comparison_mode:
322
+ comparison = f"""
323
+ **Original ({len(text.split())} words):**
324
+ {text[:500]}{'...' if len(text) > 500 else ''}
325
+
326
+ ---
327
+ **Humanized ({len(humanized.split())} words):**
328
+ {humanized}
329
+ """
330
+ return humanized, metrics_md, comparison
331
+
332
+ return humanized, metrics_md, ""
333
+
334
+ # Custom CSS for premium feel
335
+ css = """
336
+ .gradio-container {
337
+ font-family: 'Inter', sans-serif;
338
+ }
339
+ .metric-card {
340
+ background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
341
+ border-radius: 8px;
342
+ padding: 16px;
343
+ color: white;
344
+ }
345
+ """
346
+
347
+ with gr.Blocks(css=css, title="Deep Humanizer Pro", theme=gr.themes.Soft()) as demo:
348
+ gr.Markdown("""
349
+ # 🧠 Deep Humanizer Pro
350
+ ### Advanced AI-to-Human Text Transformation using Llama 3.3 70B
351
+ *Elite-grade humanization with linguistic analysis and adversarial pattern disruption*
352
+ """)
353
+
354
+ with gr.Row():
355
+ with gr.Column(scale=1):
356
+ input_text = gr.Textbox(
357
+ label="Input Text (AI-generated)",
358
+ placeholder="Paste your AI-generated content here...",
359
+ lines=10
360
+ )
361
+
362
+ with gr.Row():
363
+ style = gr.Dropdown(
364
+ choices=["casual", "professional", "academic", "creative", "reddit", "twitter"],
365
+ value="casual",
366
+ label="Voice Style"
367
+ )
368
+ intensity = gr.Radio(
369
+ choices=["light", "medium", "aggressive"],
370
+ value="medium",
371
+ label="Humanization Intensity"
372
+ )
373
+
374
+ with gr.Row():
375
+ creativity = gr.Slider(
376
+ minimum=0.1, maximum=1.0, value=0.8, step=0.1,
377
+ label="Creativity (Temperature)"
378
+ )
379
+ add_imperfections = gr.Checkbox(
380
+ label="Add Natural Imperfections",
381
+ value=True
382
+ )
383
+
384
+ comparison_mode = gr.Checkbox(
385
+ label="Show Side-by-Side Comparison",
386
+ value=False
387
+ )
388
+
389
+ submit_btn = gr.Button("🚀 Humanize Text", variant="primary")
390
+
391
+ with gr.Column(scale=1):
392
+ output_text = gr.Textbox(
393
+ label="Humanized Output",
394
+ lines=10,
395
+ show_copy_button=True
396
+ )
397
+ metrics_display = gr.Markdown()
398
+ comparison_display = gr.Markdown()
399
+
400
+ # Examples
401
+ gr.Examples(
402
+ examples=[
403
+ ["Artificial Intelligence (AI) refers to the simulation of human intelligence in machines that are programmed to think like humans and mimic their actions. The term may also be applied to any machine that exhibits traits associated with a human mind such as learning and problem-solving.", "casual", "medium"],
404
+ ["In conclusion, it is important to note that leveraging cutting-edge technologies can facilitate optimal outcomes for stakeholders.", "professional", "aggressive"],
405
+ ],
406
+ inputs=[input_text, style, intensity],
407
+ label="Try these examples"
408
+ )
409
+
410
+ submit_btn.click(
411
+ fn=process_text,
412
+ inputs=[input_text, style, intensity, creativity, add_imperfections, comparison_mode],
413
+ outputs=[output_text, metrics_display, comparison_display]
414
+ )
415
+
416
+ gr.Markdown("""
417
+ ### 🛠️ Technical Specifications
418
+ - **Model**: Llama 3.3 70B Instruct (4-bit quantized)
419
+ - **Architecture**: Flash Attention 2 + Gradient Checkpointing
420
+ - **Analysis**: Perplexity scoring, burstiness calculation, AI marker detection
421
+ - **GPU**: Optimized for A100/H100 (80GB VRAM)
422
+ """)
423
+
424
+ if __name__ == "__main__":
425
+ demo.launch()