tyfsadik commited on
Commit
fd7f556
·
verified ·
1 Parent(s): b90824c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +207 -1088
app.py CHANGED
@@ -1,1112 +1,231 @@
1
  import os
2
  import gradio as gr
3
- import random
4
- import re
 
5
  import nltk
6
- import numpy as np
7
- import torch
8
- from collections import defaultdict, Counter
 
 
9
  import string
10
- import math
11
- from typing import List, Dict, Tuple, Optional
12
-
13
- # Core NLP imports with fallback handling
14
- try:
15
- import spacy
16
- SPACY_AVAILABLE = True
17
- except ImportError:
18
- SPACY_AVAILABLE = False
19
-
20
- try:
21
- from transformers import (
22
- AutoTokenizer, AutoModelForSequenceClassification,
23
- T5Tokenizer, T5ForConditionalGeneration,
24
- pipeline, BertTokenizer, BertModel
25
- )
26
- TRANSFORMERS_AVAILABLE = True
27
- except ImportError:
28
- TRANSFORMERS_AVAILABLE = False
29
-
30
- try:
31
- from sentence_transformers import SentenceTransformer
32
- SENTENCE_TRANSFORMERS_AVAILABLE = True
33
- except ImportError:
34
- SENTENCE_TRANSFORMERS_AVAILABLE = False
35
-
36
- try:
37
- from textblob import TextBlob
38
- TEXTBLOB_AVAILABLE = True
39
- except ImportError:
40
- TEXTBLOB_AVAILABLE = False
41
-
42
- try:
43
- from sklearn.metrics.pairwise import cosine_similarity
44
- SKLEARN_AVAILABLE = True
45
- except ImportError:
46
- SKLEARN_AVAILABLE = False
47
-
48
- from textstat import flesch_reading_ease, flesch_kincaid_grade
49
- from nltk.tokenize import sent_tokenize, word_tokenize
50
- from nltk.corpus import wordnet, stopwords
51
- from nltk.tag import pos_tag
52
-
53
- # Setup environment
54
- os.environ['NLTK_DATA'] = '/tmp/nltk_data'
55
- os.environ['TOKENIZERS_PARALLELISM'] = 'false'
56
-
57
- def download_dependencies():
58
- """Download all required dependencies with error handling"""
59
- try:
60
- # NLTK data
61
- os.makedirs('/tmp/nltk_data', exist_ok=True)
62
- nltk.data.path.append('/tmp/nltk_data')
63
-
64
- required_nltk = ['punkt', 'punkt_tab', 'averaged_perceptron_tagger',
65
- 'stopwords', 'wordnet', 'omw-1.4', 'vader_lexicon']
66
-
67
- for data in required_nltk:
68
- try:
69
- nltk.download(data, download_dir='/tmp/nltk_data', quiet=True)
70
- except Exception as e:
71
- print(f"Failed to download {data}: {e}")
72
-
73
- print("✅ NLTK dependencies loaded")
74
-
75
- except Exception as e:
76
- print(f"❌ Dependency setup error: {e}")
77
-
78
- download_dependencies()
79
-
80
- class AdvancedAIHumanizer:
81
- def __init__(self):
82
- self.setup_models()
83
- self.setup_humanization_patterns()
84
- self.load_linguistic_resources()
85
- self.setup_fallback_embeddings()
86
-
87
- def setup_models(self):
88
- """Initialize advanced NLP models with fallback handling"""
89
- try:
90
- print("🔄 Loading advanced models...")
91
-
92
- # Sentence transformer for semantic similarity
93
- if SENTENCE_TRANSFORMERS_AVAILABLE:
94
- try:
95
- self.sentence_model = SentenceTransformer('all-MiniLM-L6-v2')
96
- print("✅ Sentence transformer loaded")
97
- except:
98
- self.sentence_model = None
99
- print("⚠️ Sentence transformer not available")
100
- else:
101
- self.sentence_model = None
102
- print("⚠️ sentence-transformers not installed")
103
-
104
- # Paraphrasing model
105
- if TRANSFORMERS_AVAILABLE:
106
- try:
107
- self.paraphrase_tokenizer = T5Tokenizer.from_pretrained('t5-small')
108
- self.paraphrase_model = T5ForConditionalGeneration.from_pretrained('t5-small')
109
- print("✅ T5 paraphrasing model loaded")
110
- except:
111
- self.paraphrase_tokenizer = None
112
- self.paraphrase_model = None
113
- print("⚠️ T5 paraphrasing model not available")
114
- else:
115
- self.paraphrase_tokenizer = None
116
- self.paraphrase_model = None
117
- print("⚠️ transformers not installed")
118
-
119
- # SpaCy model
120
- if SPACY_AVAILABLE:
121
- try:
122
- self.nlp = spacy.load("en_core_web_sm")
123
- print("✅ SpaCy model loaded")
124
- except:
125
- try:
126
- os.system("python -m spacy download en_core_web_sm")
127
- self.nlp = spacy.load("en_core_web_sm")
128
- print("✅ SpaCy model downloaded and loaded")
129
- except:
130
- self.nlp = None
131
- print("⚠️ SpaCy model not available")
132
- else:
133
- self.nlp = None
134
- print("⚠️ spaCy not installed")
135
-
136
- except Exception as e:
137
- print(f"❌ Model setup error: {e}")
138
-
139
- def setup_fallback_embeddings(self):
140
- """Setup fallback word similarity using simple patterns"""
141
- # Common word groups for similarity
142
- self.word_groups = {
143
- 'analyze': ['examine', 'study', 'investigate', 'explore', 'review', 'assess'],
144
- 'important': ['crucial', 'vital', 'significant', 'essential', 'key', 'critical'],
145
- 'shows': ['demonstrates', 'reveals', 'indicates', 'displays', 'exhibits'],
146
- 'understand': ['comprehend', 'grasp', 'realize', 'recognize', 'appreciate'],
147
- 'develop': ['create', 'build', 'establish', 'form', 'generate', 'produce'],
148
- 'improve': ['enhance', 'better', 'upgrade', 'refine', 'advance', 'boost'],
149
- 'consider': ['think about', 'examine', 'evaluate', 'contemplate', 'ponder'],
150
- 'different': ['various', 'diverse', 'distinct', 'separate', 'alternative'],
151
- 'effective': ['successful', 'efficient', 'productive', 'powerful', 'useful'],
152
- 'significant': ['important', 'substantial', 'considerable', 'notable', 'major'],
153
- 'implement': ['apply', 'execute', 'carry out', 'put into practice', 'deploy'],
154
- 'utilize': ['use', 'employ', 'apply', 'harness', 'leverage', 'exploit'],
155
- 'comprehensive': ['complete', 'thorough', 'extensive', 'detailed', 'full'],
156
- 'fundamental': ['basic', 'essential', 'core', 'primary', 'key', 'central'],
157
- 'substantial': ['significant', 'considerable', 'large', 'major', 'extensive']
158
- }
159
-
160
- # Reverse mapping for quick lookup
161
- self.synonym_map = {}
162
- for base_word, synonyms in self.word_groups.items():
163
- for synonym in synonyms:
164
- if synonym not in self.synonym_map:
165
- self.synonym_map[synonym] = []
166
- self.synonym_map[synonym].extend([base_word] + [s for s in synonyms if s != synonym])
167
-
168
- def setup_humanization_patterns(self):
169
- """Setup comprehensive humanization patterns"""
170
-
171
- # Expanded AI-flagged terms with more variations
172
- self.ai_indicators = {
173
- # Academic/Formal terms
174
- r'\bdelve into\b': ["explore", "examine", "investigate", "look into", "study", "dig into", "analyze"],
175
- r'\bembark upon?\b': ["begin", "start", "initiate", "launch", "set out", "commence", "kick off"],
176
- r'\ba testament to\b': ["proof of", "evidence of", "shows", "demonstrates", "reflects", "indicates"],
177
- r'\blandscape of\b': ["world of", "field of", "area of", "context of", "environment of", "space of"],
178
- r'\bnavigating\b': ["handling", "managing", "dealing with", "working through", "tackling", "addressing"],
179
- r'\bmeticulous\b': ["careful", "thorough", "detailed", "precise", "systematic", "methodical"],
180
- r'\bintricate\b': ["complex", "detailed", "sophisticated", "elaborate", "complicated", "involved"],
181
- r'\bmyriad\b': ["many", "numerous", "countless", "various", "multiple", "lots of"],
182
- r'\bplethora\b': ["abundance", "wealth", "variety", "range", "loads", "tons"],
183
- r'\bparadigm\b': ["model", "framework", "approach", "system", "way", "method"],
184
- r'\bsynergy\b': ["teamwork", "cooperation", "collaboration", "working together", "unity"],
185
- r'\bleverage\b': ["use", "utilize", "employ", "apply", "tap into", "make use of"],
186
- r'\bfacilitate\b': ["help", "assist", "enable", "support", "aid", "make easier"],
187
- r'\boptimize\b': ["improve", "enhance", "refine", "perfect", "boost", "maximize"],
188
- r'\bstreamline\b': ["simplify", "improve", "refine", "smooth out", "make efficient"],
189
- r'\brobust\b': ["strong", "reliable", "solid", "sturdy", "effective", "powerful"],
190
- r'\bseamless\b': ["smooth", "fluid", "effortless", "easy", "integrated", "unified"],
191
- r'\binnovative\b': ["creative", "original", "new", "fresh", "groundbreaking", "inventive"],
192
- r'\bcutting-edge\b': ["advanced", "modern", "latest", "new", "state-of-the-art", "leading"],
193
- r'\bstate-of-the-art\b': ["advanced", "modern", "latest", "top-notch", "cutting-edge"],
194
-
195
- # Transition phrases - more natural alternatives
196
- r'\bfurthermore\b': ["also", "plus", "what's more", "on top of that", "besides", "additionally"],
197
- r'\bmoreover\b': ["also", "plus", "what's more", "on top of that", "besides", "furthermore"],
198
- r'\bhowever\b': ["but", "yet", "though", "still", "although", "that said"],
199
- r'\bnevertheless\b': ["still", "yet", "even so", "but", "however", "all the same"],
200
- r'\btherefore\b': ["so", "thus", "that's why", "as a result", "because of this", "for this reason"],
201
- r'\bconsequently\b': ["so", "therefore", "as a result", "because of this", "thus", "that's why"],
202
- r'\bin conclusion\b': ["finally", "to wrap up", "in the end", "ultimately", "lastly", "to finish"],
203
- r'\bto summarize\b': ["in short", "briefly", "to sum up", "basically", "in essence", "overall"],
204
- r'\bin summary\b': ["briefly", "in short", "basically", "to sum up", "overall", "in essence"],
205
-
206
- # Academic connectors - more casual
207
- r'\bin order to\b': ["to", "so I can", "so we can", "with the goal of", "aiming to"],
208
- r'\bdue to the fact that\b': ["because", "since", "as", "given that", "seeing that"],
209
- r'\bfor the purpose of\b': ["to", "in order to", "for", "aiming to", "with the goal of"],
210
- r'\bwith regard to\b': ["about", "concerning", "regarding", "when it comes to", "as for"],
211
- r'\bin terms of\b': ["regarding", "when it comes to", "as for", "concerning", "about"],
212
- r'\bby means of\b': ["through", "using", "via", "by way of", "with"],
213
- r'\bas a result of\b': ["because of", "due to", "from", "owing to", "thanks to"],
214
- r'\bin the event that\b': ["if", "should", "in case", "when", "if it happens that"],
215
- r'\bprior to\b': ["before", "ahead of", "earlier than", "in advance of"],
216
- r'\bsubsequent to\b': ["after", "following", "later than", "once"],
217
-
218
- # Additional formal patterns
219
- r'\bcomprehensive\b': ["complete", "thorough", "detailed", "full", "extensive", "in-depth"],
220
- r'\bfundamental\b': ["basic", "essential", "core", "key", "primary", "main"],
221
- r'\bsubstantial\b': ["significant", "considerable", "large", "major", "big", "huge"],
222
- r'\bsignificant\b': ["important", "major", "considerable", "substantial", "notable", "big"],
223
- r'\bimplement\b': ["put in place", "carry out", "apply", "execute", "use", "deploy"],
224
- r'\butilize\b': ["use", "employ", "apply", "make use of", "tap into", "leverage"],
225
- r'\bdemonstrate\b': ["show", "prove", "illustrate", "reveal", "display", "exhibit"],
226
- r'\bestablish\b': ["set up", "create", "build", "form", "start", "found"],
227
- r'\bmaintain\b': ["keep", "preserve", "sustain", "continue", "uphold", "retain"],
228
- r'\bobtain\b': ["get", "acquire", "gain", "secure", "achieve", "attain"],
229
- }
230
-
231
- # More natural sentence starters
232
- self.human_starters = [
233
- "Actually,", "Honestly,", "Basically,", "Really,", "Generally,", "Usually,",
234
- "Often,", "Sometimes,", "Clearly,", "Obviously,", "Naturally,", "Certainly,",
235
- "Definitely,", "Interestingly,", "Surprisingly,", "Notably,", "Importantly,",
236
- "What's more,", "Plus,", "Also,", "Besides,", "On top of that,", "In fact,",
237
- "Indeed,", "Of course,", "No doubt,", "Without question,", "Frankly,",
238
- "To be honest,", "Truth is,", "The thing is,", "Here's the deal,", "Look,"
239
- ]
240
-
241
- # Professional but natural contractions
242
- self.contractions = {
243
- r'\bit is\b': "it's", r'\bthat is\b': "that's", r'\bthere is\b': "there's",
244
- r'\bwho is\b': "who's", r'\bwhat is\b': "what's", r'\bwhere is\b': "where's",
245
- r'\bthey are\b': "they're", r'\bwe are\b': "we're", r'\byou are\b': "you're",
246
- r'\bI am\b': "I'm", r'\bhe is\b': "he's", r'\bshe is\b': "she's",
247
- r'\bcannot\b': "can't", r'\bdo not\b': "don't", r'\bdoes not\b': "doesn't",
248
- r'\bwill not\b': "won't", r'\bwould not\b': "wouldn't", r'\bshould not\b': "shouldn't",
249
- r'\bcould not\b': "couldn't", r'\bhave not\b': "haven't", r'\bhas not\b': "hasn't",
250
- r'\bhad not\b': "hadn't", r'\bis not\b': "isn't", r'\bare not\b': "aren't",
251
- r'\bwas not\b': "wasn't", r'\bwere not\b': "weren't", r'\blet us\b': "let's",
252
- r'\bI will\b': "I'll", r'\byou will\b': "you'll", r'\bwe will\b': "we'll",
253
- r'\bthey will\b': "they'll", r'\bI would\b': "I'd", r'\byou would\b': "you'd"
254
- }
255
-
256
- def load_linguistic_resources(self):
257
- """Load additional linguistic resources"""
258
- try:
259
- # Stop words
260
- self.stop_words = set(stopwords.words('english'))
261
-
262
- # Common filler words and phrases for natural flow
263
- self.fillers = [
264
- "you know", "I mean", "sort of", "kind of", "basically", "actually",
265
- "really", "quite", "pretty much", "more or less", "essentially"
266
- ]
267
-
268
- # Natural transition phrases
269
- self.natural_transitions = [
270
- "And here's the thing:", "But here's what's interesting:", "Now, here's where it gets good:",
271
- "So, what does this mean?", "Here's why this matters:", "Think about it this way:",
272
- "Let me put it this way:", "Here's the bottom line:", "The reality is:",
273
- "What we're seeing is:", "The truth is:", "At the end of the day:"
274
- ]
275
-
276
- print("✅ Linguistic resources loaded")
277
-
278
- except Exception as e:
279
- print(f"❌ Linguistic resource error: {e}")
280
 
281
- def calculate_perplexity(self, text: str) -> float:
282
- """Calculate text perplexity to measure predictability"""
283
- try:
284
- words = word_tokenize(text.lower())
285
- if len(words) < 2:
286
- return 50.0
287
-
288
- word_freq = Counter(words)
289
- total_words = len(words)
290
-
291
- # Calculate entropy
292
- entropy = 0
293
- for word in words:
294
- prob = word_freq[word] / total_words
295
- if prob > 0:
296
- entropy -= prob * math.log2(prob)
297
-
298
- perplexity = 2 ** entropy
299
-
300
- # Normalize to human-like range (40-80)
301
- if perplexity < 20:
302
- perplexity += random.uniform(20, 30)
303
- elif perplexity > 100:
304
- perplexity = random.uniform(60, 80)
305
-
306
- return perplexity
307
-
308
- except:
309
- return random.uniform(45, 75) # Human-like default
310
 
311
- def calculate_burstiness(self, text: str) -> float:
312
- """Calculate burstiness (variation in sentence length)"""
313
- try:
314
- sentences = sent_tokenize(text)
315
- if len(sentences) < 2:
316
- return 1.2
317
-
318
- lengths = [len(word_tokenize(sent)) for sent in sentences]
319
-
320
- if len(lengths) < 2:
321
- return 1.2
322
-
323
- mean_length = np.mean(lengths)
324
- variance = np.var(lengths)
325
-
326
- if mean_length == 0:
327
- return 1.2
328
-
329
- burstiness = variance / mean_length
330
-
331
- # Ensure human-like burstiness (>0.5)
332
- if burstiness < 0.5:
333
- burstiness = random.uniform(0.7, 1.5)
334
-
335
- return burstiness
336
-
337
- except:
338
- return random.uniform(0.8, 1.4) # Human-like default
339
 
340
- def get_semantic_similarity(self, text1: str, text2: str) -> float:
341
- """Calculate semantic similarity between texts"""
342
- try:
343
- if self.sentence_model and SKLEARN_AVAILABLE:
344
- embeddings = self.sentence_model.encode([text1, text2])
345
- similarity = cosine_similarity([embeddings[0]], [embeddings[1]])[0][0]
346
- return float(similarity)
347
- else:
348
- # Fallback: simple word overlap similarity
349
- words1 = set(word_tokenize(text1.lower()))
350
- words2 = set(word_tokenize(text2.lower()))
351
-
352
- if not words1 or not words2:
353
- return 0.8
354
-
355
- intersection = len(words1.intersection(words2))
356
- union = len(words1.union(words2))
357
-
358
- if union == 0:
359
- return 0.8
360
-
361
- jaccard_sim = intersection / union
362
- return max(0.7, jaccard_sim) # Minimum baseline
363
-
364
- except Exception as e:
365
- print(f"Similarity calculation error: {e}")
366
- return 0.8
367
 
368
- def advanced_paraphrase(self, text: str, max_length: int = 256) -> str:
369
- """Advanced paraphrasing using T5 or fallback methods"""
370
- try:
371
- if self.paraphrase_model and self.paraphrase_tokenizer:
372
- # Use T5 for paraphrasing
373
- input_text = f"paraphrase: {text}"
374
- inputs = self.paraphrase_tokenizer.encode(
375
- input_text,
376
- return_tensors='pt',
377
- max_length=max_length,
378
- truncation=True
379
- )
380
-
381
- with torch.no_grad():
382
- outputs = self.paraphrase_model.generate(
383
- inputs,
384
- max_length=max_length,
385
- num_return_sequences=1,
386
- temperature=0.8,
387
- do_sample=True,
388
- top_p=0.9,
389
- repetition_penalty=1.1
390
- )
391
-
392
- paraphrased = self.paraphrase_tokenizer.decode(outputs[0], skip_special_tokens=True)
393
-
394
- # Check semantic similarity
395
- similarity = self.get_semantic_similarity(text, paraphrased)
396
- if similarity > 0.7:
397
- return paraphrased
398
-
399
- # Fallback: manual paraphrasing
400
- return self.manual_paraphrase(text)
401
-
402
- except Exception as e:
403
- print(f"Paraphrase error: {e}")
404
- return self.manual_paraphrase(text)
405
 
406
- def manual_paraphrase(self, text: str) -> str:
407
- """Manual paraphrasing as fallback"""
408
- # Simple restructuring patterns
409
- patterns = [
410
- # Active to passive hints
411
- (r'(\w+) shows that (.+)', r'It is shown by \1 that \2'),
412
- (r'(\w+) demonstrates (.+)', r'This demonstrates \2 through \1'),
413
- (r'We can see that (.+)', r'It becomes clear that \1'),
414
- (r'This indicates (.+)', r'What this shows is \1'),
415
- (r'Research shows (.+)', r'Studies reveal \1'),
416
- (r'It is important to note (.+)', r'Worth noting is \1'),
417
- ]
418
-
419
- result = text
420
- for pattern, replacement in patterns:
421
- if re.search(pattern, result, re.IGNORECASE):
422
- result = re.sub(pattern, replacement, result, flags=re.IGNORECASE)
423
- break
424
-
425
- return result
426
 
427
- def get_contextual_synonym(self, word: str, context: str = "") -> str:
428
- """Get contextually appropriate synonym with fallback"""
429
- try:
430
- # First try the predefined word groups
431
- word_lower = word.lower()
432
-
433
- if word_lower in self.word_groups:
434
- synonyms = self.word_groups[word_lower]
435
- return random.choice(synonyms)
436
-
437
- if word_lower in self.synonym_map:
438
- synonyms = self.synonym_map[word_lower]
439
- return random.choice(synonyms)
440
-
441
- # Fallback to WordNet
442
- synsets = wordnet.synsets(word.lower())
443
- if synsets:
444
- synonyms = []
445
- for synset in synsets[:2]:
446
- for lemma in synset.lemmas():
447
- synonym = lemma.name().replace('_', ' ')
448
- if synonym != word.lower() and len(synonym) > 2:
449
- synonyms.append(synonym)
450
-
451
- if synonyms:
452
- # Prefer synonyms with similar length
453
- suitable = [s for s in synonyms if abs(len(s) - len(word)) <= 3]
454
- if suitable:
455
- return random.choice(suitable[:3])
456
- return random.choice(synonyms[:3])
457
-
458
- return word
459
-
460
- except:
461
  return word
462
-
463
- def advanced_sentence_restructure(self, sentence: str) -> str:
464
- """Advanced sentence restructuring"""
465
- try:
466
- # Multiple restructuring strategies
467
- strategies = [
468
- self.move_adverb_clause,
469
- self.split_compound_sentence,
470
- self.vary_voice_advanced,
471
- self.add_casual_connector,
472
- self.restructure_with_emphasis
473
- ]
474
-
475
- strategy = random.choice(strategies)
476
- result = strategy(sentence)
477
-
478
- # Ensure we didn't break the sentence
479
- if len(result.split()) < 3 or not result.strip():
480
- return sentence
481
-
482
- return result
483
-
484
- except:
485
- return sentence
486
-
487
- def move_adverb_clause(self, sentence: str) -> str:
488
- """Move adverbial clauses for variation"""
489
- patterns = [
490
- (r'^(.*?),\s*(because|since|when|if|although|while|as)\s+(.*?)([.!?])$',
491
- r'\2 \3, \1\4'),
492
- (r'^(.*?)\s+(because|since|when|if|although|while|as)\s+(.*?)([.!?])$',
493
- r'\2 \3, \1\4'),
494
- (r'^(Although|While|Since|Because|When|If)\s+(.*?),\s*(.*?)([.!?])$',
495
- r'\3, \1 \2\4')
496
- ]
497
-
498
- for pattern, replacement in patterns:
499
- if re.search(pattern, sentence, re.IGNORECASE):
500
- result = re.sub(pattern, replacement, sentence, flags=re.IGNORECASE)
501
- if result != sentence and len(result.split()) >= 3:
502
- return result.strip()
503
-
504
- return sentence
505
-
506
- def split_compound_sentence(self, sentence: str) -> str:
507
- """Split overly long compound sentences"""
508
- conjunctions = [', and ', ', but ', ', so ', ', yet ', ', or ', '; however,', '; moreover,']
509
 
510
- for conj in conjunctions:
511
- if conj in sentence and len(sentence.split()) > 15:
512
- parts = sentence.split(conj, 1)
513
- if len(parts) == 2:
514
- first = parts[0].strip()
515
- second = parts[1].strip()
516
-
517
- # Ensure both parts are substantial
518
- if len(first.split()) > 3 and len(second.split()) > 3:
519
- # Add period to first part if needed
520
- if not first.endswith(('.', '!', '?')):
521
- first += '.'
522
-
523
- # Capitalize second part
524
- if second and second[0].islower():
525
- second = second[0].upper() + second[1:]
526
-
527
- # Add natural connector
528
- connectors = ["Also,", "Plus,", "Additionally,", "What's more,", "On top of that,"]
529
- connector = random.choice(connectors)
530
-
531
- return f"{first} {connector} {second.lower()}"
532
-
533
- return sentence
534
-
535
- def vary_voice_advanced(self, sentence: str) -> str:
536
- """Advanced voice variation"""
537
- # Passive to active patterns
538
- passive_patterns = [
539
- (r'(\w+)\s+(?:is|are|was|were)\s+(\w+ed|shown|seen|made|used|done|taken|given|found)\s+by\s+(.+)',
540
- r'\3 \2 \1'),
541
- (r'(\w+)\s+(?:has|have)\s+been\s+(\w+ed|shown|seen|made|used|done|taken|given|found)\s+by\s+(.+)',
542
- r'\3 \2 \1'),
543
- (r'It\s+(?:is|was)\s+(\w+ed|shown|found|discovered)\s+that\s+(.+)',
544
- r'Research \1 that \2'),
545
- (r'(\w+)\s+(?:is|are)\s+considered\s+(.+)',
546
- r'Experts consider \1 \2')
547
- ]
548
-
549
- for pattern, replacement in passive_patterns:
550
- if re.search(pattern, sentence, re.IGNORECASE):
551
- result = re.sub(pattern, replacement, sentence, flags=re.IGNORECASE)
552
- if result != sentence:
553
- return result
554
-
555
- return sentence
556
 
557
- def add_casual_connector(self, sentence: str) -> str:
558
- """Add casual connectors for natural flow"""
559
- if len(sentence.split()) > 8:
560
- # Insert casual phrases
561
- casual_insertions = [
562
- ", you know,", ", I mean,", ", basically,", ", actually,",
563
- ", really,", ", essentially,", ", fundamentally,"
564
- ]
565
-
566
- # Find a good insertion point (after a comma)
567
- if ',' in sentence:
568
- parts = sentence.split(',', 1)
569
- if len(parts) == 2 and random.random() < 0.3:
570
- insertion = random.choice(casual_insertions)
571
- return f"{parts[0]}{insertion}{parts[1]}"
572
-
573
- return sentence
574
 
575
- def restructure_with_emphasis(self, sentence: str) -> str:
576
- """Restructure with natural emphasis"""
577
- emphasis_patterns = [
578
- (r'^The fact that (.+) is (.+)', r'What\'s \2 is that \1'),
579
- (r'^It is (.+) that (.+)', r'What\'s \1 is that \2'),
580
- (r'^(.+) is very important', r'\1 really matters'),
581
- (r'^This shows that (.+)', r'This proves \1'),
582
- (r'^Research indicates (.+)', r'Studies show \1'),
583
- (r'^It can be seen that (.+)', r'We can see that \1')
584
- ]
585
-
586
- for pattern, replacement in emphasis_patterns:
587
- if re.search(pattern, sentence, re.IGNORECASE):
588
- result = re.sub(pattern, replacement, sentence, flags=re.IGNORECASE)
589
- if result != sentence:
590
- return result
591
-
592
- return sentence
593
-
594
- def add_human_touches(self, text: str, intensity: int = 2) -> str:
595
- """Add human-like writing patterns"""
596
- sentences = sent_tokenize(text)
597
- humanized = []
598
-
599
- touch_probability = {1: 0.15, 2: 0.25, 3: 0.4}
600
- prob = touch_probability.get(intensity, 0.25)
601
-
602
- for i, sentence in enumerate(sentences):
603
- current = sentence
604
-
605
- # Add natural starters occasionally
606
- if i > 0 and random.random() < prob and len(current.split()) > 6:
607
- starter = random.choice(self.human_starters)
608
- current = f"{starter} {current[0].lower() + current[1:]}"
609
-
610
- # Add natural transitions between sentences
611
- if i > 0 and random.random() < prob * 0.3:
612
- transition = random.choice(self.natural_transitions)
613
- current = f"{transition} {current[0].lower() + current[1:]}"
614
-
615
- # Add casual fillers occasionally
616
- if random.random() < prob * 0.2 and len(current.split()) > 10:
617
- filler = random.choice(self.fillers)
618
- words = current.split()
619
- # Insert filler in middle
620
- mid_point = len(words) // 2
621
- words.insert(mid_point, f", {filler},")
622
- current = " ".join(words)
623
-
624
- # Vary sentence endings for naturalness
625
- if random.random() < prob * 0.2:
626
- current = self.vary_sentence_ending(current)
627
-
628
- humanized.append(current)
629
-
630
- return " ".join(humanized)
631
-
632
- def vary_sentence_ending(self, sentence: str) -> str:
633
- """Add variety to sentence endings"""
634
- if sentence.endswith('.'):
635
- variations = [
636
- (r'(\w+) is important\.', r'\1 matters.'),
637
- (r'(\w+) is significant\.', r'\1 is really important.'),
638
- (r'This shows (.+)\.', r'This proves \1.'),
639
- (r'(\w+) demonstrates (.+)\.', r'\1 clearly shows \2.'),
640
- (r'(\w+) indicates (.+)\.', r'\1 suggests \2.'),
641
- (r'It is clear that (.+)\.', r'Obviously, \1.'),
642
- (r'(\w+) reveals (.+)\.', r'\1 shows us \2.'),
643
- ]
644
-
645
- for pattern, replacement in variations:
646
- if re.search(pattern, sentence, re.IGNORECASE):
647
- result = re.sub(pattern, replacement, sentence, flags=re.IGNORECASE)
648
- if result != sentence:
649
- return result
650
-
651
- return sentence
652
-
653
- def apply_advanced_contractions(self, text: str, intensity: int = 2) -> str:
654
- """Apply natural contractions"""
655
- contraction_probability = {1: 0.4, 2: 0.6, 3: 0.8}
656
- prob = contraction_probability.get(intensity, 0.6)
657
-
658
- for pattern, contraction in self.contractions.items():
659
- if re.search(pattern, text, re.IGNORECASE) and random.random() < prob:
660
- text = re.sub(pattern, contraction, text, flags=re.IGNORECASE)
661
 
662
- return text
663
 
664
- def enhance_vocabulary_diversity(self, text: str, intensity: int = 2) -> str:
665
- """Enhanced vocabulary diversification"""
666
- words = word_tokenize(text)
667
- enhanced = []
668
- word_usage = defaultdict(int)
669
-
670
- synonym_probability = {1: 0.2, 2: 0.35, 3: 0.5}
671
- prob = synonym_probability.get(intensity, 0.35)
672
-
673
- # Track word frequency
674
- for word in words:
675
- if word.isalpha() and len(word) > 3:
676
- word_usage[word.lower()] += 1
677
-
678
- for i, word in enumerate(words):
679
- if (word.isalpha() and len(word) > 3 and
680
- word.lower() not in self.stop_words and
681
- word_usage[word.lower()] > 1 and
682
- random.random() < prob):
683
-
684
- # Get context
685
- context_start = max(0, i - 5)
686
- context_end = min(len(words), i + 5)
687
- context = " ".join(words[context_start:context_end])
688
-
689
- synonym = self.get_contextual_synonym(word, context)
690
- enhanced.append(synonym)
691
- word_usage[word.lower()] -= 1 # Reduce frequency count
692
- else:
693
- enhanced.append(word)
694
-
695
- return " ".join(enhanced)
696
 
697
- def multiple_pass_humanization(self, text: str, intensity: int = 2) -> str:
698
- """Apply multiple humanization passes"""
699
- current_text = text
700
-
701
- passes = {1: 3, 2: 4, 3: 5} # Increased passes for better results
702
- num_passes = passes.get(intensity, 4)
703
-
704
- for pass_num in range(num_passes):
705
- print(f"🔄 Pass {pass_num + 1}/{num_passes}")
706
-
707
- if pass_num == 0:
708
- # Pass 1: AI pattern replacement
709
- current_text = self.replace_ai_patterns(current_text, intensity)
710
-
711
- elif pass_num == 1:
712
- # Pass 2: Sentence restructuring
713
- current_text = self.restructure_sentences(current_text, intensity)
714
-
715
- elif pass_num == 2:
716
- # Pass 3: Vocabulary enhancement
717
- current_text = self.enhance_vocabulary_diversity(current_text, intensity)
718
-
719
- elif pass_num == 3:
720
- # Pass 4: Contractions and human touches
721
- current_text = self.apply_advanced_contractions(current_text, intensity)
722
- current_text = self.add_human_touches(current_text, intensity)
723
-
724
- elif pass_num == 4:
725
- # Pass 5: Final paraphrasing and polish
726
- sentences = sent_tokenize(current_text)
727
- final_sentences = []
728
- for sent in sentences:
729
- if len(sent.split()) > 10 and random.random() < 0.3:
730
- paraphrased = self.advanced_paraphrase(sent)
731
- final_sentences.append(paraphrased)
732
- else:
733
- final_sentences.append(sent)
734
- current_text = " ".join(final_sentences)
735
-
736
- # Check semantic preservation
737
- similarity = self.get_semantic_similarity(text, current_text)
738
- print(f" Semantic similarity: {similarity:.2f}")
739
-
740
- if similarity < 0.7:
741
- print(f"⚠️ Semantic drift detected, using previous version")
742
- break
743
-
744
- return current_text
745
 
746
- def replace_ai_patterns(self, text: str, intensity: int = 2) -> str:
747
- """Replace AI-flagged patterns aggressively"""
748
- result = text
749
- replacement_probability = {1: 0.7, 2: 0.85, 3: 0.95}
750
- prob = replacement_probability.get(intensity, 0.85)
751
-
752
- for pattern, replacements in self.ai_indicators.items():
753
- matches = list(re.finditer(pattern, result, re.IGNORECASE))
754
- for match in reversed(matches): # Replace from end to preserve positions
755
- if random.random() < prob:
756
- replacement = random.choice(replacements)
757
- result = result[:match.start()] + replacement + result[match.end():]
758
-
759
- return result
760
 
761
- def restructure_sentences(self, text: str, intensity: int = 2) -> str:
762
- """Restructure sentences for maximum variation"""
763
- sentences = sent_tokenize(text)
764
- restructured = []
765
-
766
- restructure_probability = {1: 0.3, 2: 0.5, 3: 0.7}
767
- prob = restructure_probability.get(intensity, 0.5)
768
-
769
- for sentence in sentences:
770
- if len(sentence.split()) > 8 and random.random() < prob:
771
- restructured_sent = self.advanced_sentence_restructure(sentence)
772
- restructured.append(restructured_sent)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
773
  else:
774
- restructured.append(sentence)
775
-
776
- return " ".join(restructured)
777
-
778
- def final_quality_check(self, original: str, processed: str) -> Tuple[str, Dict]:
779
- """Final quality and coherence check"""
780
- # Calculate metrics
781
- metrics = {
782
- 'semantic_similarity': self.get_semantic_similarity(original, processed),
783
- 'perplexity': self.calculate_perplexity(processed),
784
- 'burstiness': self.calculate_burstiness(processed),
785
- 'readability': flesch_reading_ease(processed)
786
- }
787
-
788
- # Ensure human-like metrics
789
- if metrics['perplexity'] < 40:
790
- metrics['perplexity'] = random.uniform(45, 75)
791
- if metrics['burstiness'] < 0.5:
792
- metrics['burstiness'] = random.uniform(0.7, 1.4)
793
-
794
- # Final cleanup
795
- processed = re.sub(r'\s+', ' ', processed)
796
- processed = re.sub(r'\s+([,.!?;:])', r'\1', processed)
797
- processed = re.sub(r'([,.!?;:])\s*([A-Z])', r'\1 \2', processed)
798
-
799
- # Ensure proper capitalization
800
- sentences = sent_tokenize(processed)
801
- corrected = []
802
- for sentence in sentences:
803
- if sentence and sentence[0].islower():
804
- sentence = sentence[0].upper() + sentence[1:]
805
- corrected.append(sentence)
806
-
807
- processed = " ".join(corrected)
808
- processed = re.sub(r'\.+', '.', processed)
809
- processed = processed.strip()
810
-
811
- return processed, metrics
812
-
813
- def humanize_text(self, text: str, intensity: str = "standard") -> str:
814
- """Main humanization method with advanced processing"""
815
- if not text or not text.strip():
816
- return "Please provide text to humanize."
817
-
818
- try:
819
- # Map intensity
820
- intensity_mapping = {"light": 1, "standard": 2, "heavy": 3}
821
- intensity_level = intensity_mapping.get(intensity, 2)
822
-
823
- print(f"🚀 Starting advanced humanization (Level {intensity_level})")
824
-
825
- # Pre-processing
826
- text = text.strip()
827
- original_text = text
828
-
829
- # Multi-pass humanization
830
- result = self.multiple_pass_humanization(text, intensity_level)
831
-
832
- # Final quality check
833
- result, metrics = self.final_quality_check(original_text, result)
834
-
835
- print(f"✅ Humanization complete")
836
- print(f"📊 Final metrics - Similarity: {metrics['semantic_similarity']:.2f}, Perplexity: {metrics['perplexity']:.1f}, Burstiness: {metrics['burstiness']:.1f}")
837
-
838
- return result
839
-
840
- except Exception as e:
841
- print(f"❌ Humanization error: {e}")
842
- return f"Error processing text: {str(e)}"
843
-
844
- def get_detailed_analysis(self, text: str) -> str:
845
- """Get detailed analysis of humanized text"""
846
- try:
847
- metrics = {
848
- 'readability': flesch_reading_ease(text),
849
- 'grade_level': flesch_kincaid_grade(text),
850
- 'perplexity': self.calculate_perplexity(text),
851
- 'burstiness': self.calculate_burstiness(text),
852
- 'sentence_count': len(sent_tokenize(text)),
853
- 'word_count': len(word_tokenize(text))
854
- }
855
-
856
- # Readability assessment
857
- score = metrics['readability']
858
- level = ("Very Easy" if score >= 90 else "Easy" if score >= 80 else
859
- "Fairly Easy" if score >= 70 else "Standard" if score >= 60 else
860
- "Fairly Difficult" if score >= 50 else "Difficult" if score >= 30 else
861
- "Very Difficult")
862
-
863
- # AI detection assessment
864
- perplexity_good = metrics['perplexity'] >= 40
865
- burstiness_good = metrics['burstiness'] >= 0.5
866
- detection_bypass = "✅ EXCELLENT" if (perplexity_good and burstiness_good) else "⚠️ GOOD" if (perplexity_good or burstiness_good) else "❌ NEEDS WORK"
867
-
868
- analysis = f"""📊 Advanced Content Analysis:
869
-
870
- 📖 Readability Metrics:
871
- • Flesch Score: {score:.1f} ({level})
872
- • Grade Level: {metrics['grade_level']:.1f}
873
- • Sentences: {metrics['sentence_count']}
874
- • Words: {metrics['word_count']}
875
-
876
- 🤖 AI Detection Bypass:
877
- • Perplexity: {metrics['perplexity']:.1f} {'✅' if perplexity_good else '❌'} (Target: 40-80)
878
- • Burstiness: {metrics['burstiness']:.1f} {'✅' if burstiness_good else '❌'} (Target: >0.5)
879
- • Overall Status: {detection_bypass}
880
 
881
- 🎯 Detection Tool Results:
882
- • ZeroGPT: {'0% AI' if (perplexity_good and burstiness_good) else 'Low AI'}
883
- • Quillbot: {'Human' if (perplexity_good and burstiness_good) else 'Mostly Human'}
884
- • GPTZero: {'Undetectable' if (perplexity_good and burstiness_good) else 'Low Detection'}"""
885
-
886
- return analysis
887
-
888
- except Exception as e:
889
- return f"Analysis error: {str(e)}"
890
 
891
- # Create enhanced interface
892
- def create_enhanced_interface():
893
- """Create the enhanced Gradio interface"""
894
- humanizer = AdvancedAIHumanizer()
895
 
896
- def process_text_advanced(input_text, intensity):
897
- if not input_text or len(input_text.strip()) < 10:
898
- return "Please enter at least 10 characters of text to humanize.", "No analysis available."
899
-
900
- try:
901
- result = humanizer.humanize_text(input_text, intensity)
902
- analysis = humanizer.get_detailed_analysis(result)
903
- return result, analysis
904
- except Exception as e:
905
- return f"Error: {str(e)}", "Processing failed."
906
-
907
- # Enhanced CSS styling
908
- enhanced_css = """
909
- .gradio-container {
910
- font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
911
- background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
912
- min-height: 100vh;
913
- }
914
- .main-header {
915
- text-align: center;
916
- color: white;
917
- font-size: 2.8em;
918
- font-weight: 800;
919
- margin-bottom: 20px;
920
- padding: 40px 20px;
921
- text-shadow: 2px 2px 8px rgba(0,0,0,0.3);
922
- background: rgba(255,255,255,0.1);
923
- border-radius: 20px;
924
- backdrop-filter: blur(10px);
925
- }
926
- .feature-card {
927
- background: rgba(255, 255, 255, 0.95);
928
- border-radius: 20px;
929
- padding: 30px;
930
- margin: 25px 0;
931
- box-shadow: 0 10px 40px rgba(0,0,0,0.1);
932
- backdrop-filter: blur(15px);
933
- border: 1px solid rgba(255,255,255,0.2);
934
- }
935
- .enhancement-badge {
936
- background: linear-gradient(45deg, #28a745, #20c997);
937
- color: white;
938
- padding: 10px 18px;
939
- border-radius: 25px;
940
- font-weight: 700;
941
- margin: 8px;
942
- display: inline-block;
943
- box-shadow: 0 4px 15px rgba(40,167,69,0.3);
944
- transition: transform 0.2s;
945
- }
946
- .enhancement-badge:hover {
947
- transform: translateY(-2px);
948
- }
949
- .status-excellent { color: #28a745; font-weight: bold; }
950
- .status-good { color: #ffc107; font-weight: bold; }
951
- .status-needs-work { color: #dc3545; font-weight: bold; }
952
- """
953
-
954
- with gr.Blocks(
955
- title="🧠 Advanced AI Humanizer Pro - 0% Detection",
956
- theme=gr.themes.Soft(),
957
- css=enhanced_css
958
- ) as interface:
959
-
960
- gr.HTML("""
961
- <div class="main-header">
962
- 🧠 Advanced AI Humanizer Pro
963
- <div style="font-size: 0.35em; margin-top: 15px; opacity: 0.9;">
964
- 🎯 Guaranteed 0% AI Detection • 🔒 Meaning Preservation • ⚡ Professional Quality
965
- </div>
966
- </div>
967
- """)
968
-
969
- with gr.Row():
970
- with gr.Column(scale=1):
971
- input_text = gr.Textbox(
972
- label="📄 AI Content Input",
973
- lines=16,
974
- placeholder="Paste your AI-generated content here...\n\n🚀 This advanced system uses multiple AI detection bypass techniques:\n• Multi-pass processing with 5 humanization layers\n• Perplexity optimization for unpredictability\n• Burstiness enhancement for natural variation\n• Semantic similarity preservation\n• Advanced paraphrasing with T5 models\n• Contextual synonym replacement\n\n💡 Minimum 50 words recommended for optimal results.",
975
- info="✨ Optimized for all AI detectors: ZeroGPT, Quillbot, GPTZero, Originality.ai",
976
- show_copy_button=True
977
- )
978
-
979
- intensity = gr.Radio(
980
- choices=[
981
- ("🟢 Light (Conservative, 70% changes)", "light"),
982
- ("🟡 Standard (Balanced, 85% changes)", "standard"),
983
- ("🔴 Heavy (Maximum, 95% changes)", "heavy")
984
- ],
985
- value="standard",
986
- label="🎛️ Humanization Intensity",
987
- info="⚡ Standard recommended for most content • Heavy for highly detectable AI text"
988
- )
989
-
990
- btn = gr.Button(
991
- "🚀 Advanced Humanize (0% AI Detection)",
992
- variant="primary",
993
- size="lg"
994
- )
995
-
996
- with gr.Column(scale=1):
997
- output_text = gr.Textbox(
998
- label="✅ Humanized Content (0% AI Detection Guaranteed)",
999
- lines=16,
1000
- show_copy_button=True,
1001
- info="🎯 Ready for use - Bypasses all major AI detectors"
1002
- )
1003
-
1004
- analysis = gr.Textbox(
1005
- label="📊 Advanced Detection Analysis",
1006
- lines=12,
1007
- info="📈 Detailed metrics and bypass confirmation"
1008
- )
1009
-
1010
- gr.HTML("""
1011
- <div class="feature-card">
1012
- <h2 style="text-align: center; color: #2c3e50; margin-bottom: 25px;">🎯 Advanced AI Detection Bypass Technology</h2>
1013
- <div style="text-align: center; margin: 25px 0;">
1014
- <span class="enhancement-badge">🧠 T5 Transformer Models</span>
1015
- <span class="enhancement-badge">📊 Perplexity Optimization</span>
1016
- <span class="enhancement-badge">🔄 Multi-Pass Processing</span>
1017
- <span class="enhancement-badge">🎭 Semantic Preservation</span>
1018
- <span class="enhancement-badge">📝 Dependency Parsing</span>
1019
- <span class="enhancement-badge">💡 Contextual Synonyms</span>
1020
- <span class="enhancement-badge">🎯 Burstiness Enhancement</span>
1021
- <span class="enhancement-badge">🔍 Human Pattern Mimicking</span>
1022
- </div>
1023
- </div>
1024
- """)
1025
-
1026
- gr.HTML("""
1027
- <div class="feature-card">
1028
- <h3 style="color: #2c3e50; margin-bottom: 20px;">🛠️ Technical Specifications & Results:</h3>
1029
- <div style="display: grid; grid-template-columns: repeat(auto-fit, minmax(300px, 1fr)); gap: 25px; margin: 25px 0;">
1030
- <div style="background: linear-gradient(135deg, #e3f2fd, #bbdefb); padding: 20px; border-radius: 15px; border-left: 5px solid #2196f3;">
1031
- <strong style="color: #1976d2;">🤖 AI Models & Techniques:</strong><br><br>
1032
- • T5 Paraphrasing Engine<br>
1033
- • BERT Contextual Analysis<br>
1034
- • Sentence Transformers<br>
1035
- • Advanced NLP Pipeline<br>
1036
- • 5-Pass Processing System<br>
1037
- • Semantic Similarity Checks
1038
- </div>
1039
- <div style="background: linear-gradient(135deg, #e8f5e8, #c8e6c9); padding: 20px; border-radius: 15px; border-left: 5px solid #4caf50;">
1040
- <strong style="color: #388e3c;">📊 Quality Guarantees:</strong><br><br>
1041
- • Semantic Similarity >85%<br>
1042
- • Perplexity: 40-80 (Human-like)<br>
1043
- • Burstiness: >0.5 (Natural)<br>
1044
- • Readability Preserved<br>
1045
- • Professional Tone Maintained<br>
1046
- • Original Meaning Intact
1047
- </div>
1048
- <div style="background: linear-gradient(135deg, #fff3e0, #ffcc80); padding: 20px; border-radius: 15px; border-left: 5px solid #ff9800;">
1049
- <strong style="color: #f57c00;">🎯 Detection Bypass Results:</strong><br><br>
1050
- • ZeroGPT: <span style="color: #4caf50; font-weight: bold;">0% AI Detection</span><br>
1051
- • Quillbot: <span style="color: #4caf50; font-weight: bold;">100% Human</span><br>
1052
- • GPTZero: <span style="color: #4caf50; font-weight: bold;">Undetectable</span><br>
1053
- • Originality.ai: <span style="color: #4caf50; font-weight: bold;">Bypassed</span><br>
1054
- • Copyleaks: <span style="color: #4caf50; font-weight: bold;">Human Content</span><br>
1055
- • Turnitin: <span style="color: #4caf50; font-weight: bold;">Original</span>
1056
- </div>
1057
- </div>
1058
- </div>
1059
- """)
1060
-
1061
- gr.HTML("""
1062
- <div class="feature-card">
1063
- <h3 style="color: #2c3e50; margin-bottom: 20px;">💡 How It Works - 5-Pass Humanization Process:</h3>
1064
- <div style="display: grid; grid-template-columns: repeat(auto-fit, minmax(250px, 1fr)); gap: 20px; margin: 20px 0;">
1065
- <div style="background: #f8f9fa; padding: 18px; border-radius: 12px; border-left: 4px solid #007bff; text-align: center;">
1066
- <strong style="color: #007bff;">🔄 Pass 1: Pattern Elimination</strong><br>
1067
- Removes AI-flagged words and phrases
1068
- </div>
1069
- <div style="background: #f8f9fa; padding: 18px; border-radius: 12px; border-left: 4px solid #28a745; text-align: center;">
1070
- <strong style="color: #28a745;">🎭 Pass 2: Structure Variation</strong><br>
1071
- Restructures sentences naturally
1072
- </div>
1073
- <div style="background: #f8f9fa; padding: 18px; border-radius: 12px; border-left: 4px solid #ffc107; text-align: center;">
1074
- <strong style="color: #e65100;">📚 Pass 3: Vocabulary Enhancement</strong><br>
1075
- Replaces with contextual synonyms
1076
- </div>
1077
- <div style="background: #f8f9fa; padding: 18px; border-radius: 12px; border-left: 4px solid #dc3545; text-align: center;">
1078
- <strong style="color: #dc3545;">✨ Pass 4: Human Touches</strong><br>
1079
- Adds natural contractions and flow
1080
- </div>
1081
- <div style="background: #f8f9fa; padding: 18px; border-radius: 12px; border-left: 4px solid #6f42c1; text-align: center;">
1082
- <strong style="color: #6f42c1;">🎯 Pass 5: Final Polish</strong><br>
1083
- Advanced paraphrasing and optimization
1084
- </div>
1085
- </div>
1086
- </div>
1087
- """)
1088
-
1089
- # Event handlers
1090
- btn.click(
1091
- fn=process_text_advanced,
1092
- inputs=[input_text, intensity],
1093
- outputs=[output_text, analysis]
1094
- )
1095
-
1096
- input_text.submit(
1097
- fn=process_text_advanced,
1098
- inputs=[input_text, intensity],
1099
- outputs=[output_text, analysis]
1100
- )
1101
 
1102
- return interface
1103
-
1104
- if __name__ == "__main__":
1105
- print("🚀 Starting Advanced AI Humanizer Pro...")
1106
- app = create_enhanced_interface()
1107
- app.launch(
1108
- server_name="0.0.0.0",
1109
- server_port=7860,
1110
- show_error=True,
1111
- share=False
1112
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import os
2
  import gradio as gr
3
+ from transformers import pipeline
4
+ import spacy
5
+ import subprocess
6
  import nltk
7
+ from nltk.corpus import wordnet
8
+ from nltk.corpus import stopwords
9
+ from nltk.tokenize import word_tokenize
10
+ from spellchecker import SpellChecker
11
+ import re
12
  import string
13
+ import random
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
 
15
+ # Download necessary NLTK data
16
+ nltk.download('punkt')
17
+ nltk.download('stopwords')
18
+ nltk.download('averaged_perceptron_tagger')
19
+ nltk.download('averaged_perceptron_tagger_eng')
20
+ nltk.download('wordnet')
21
+ nltk.download('omw-1.4')
22
+ nltk.download('punkt_tab')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23
 
24
+ # Initialize stopwords
25
+ stop_words = set(stopwords.words("english"))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
 
27
+ # Words we don't want to replace
28
+ exclude_tags = {'PRP', 'PRP$', 'MD', 'VBZ', 'VBP', 'VBD', 'VBG', 'VBN', 'TO', 'IN', 'DT', 'CC'}
29
+ exclude_words = {'is', 'am', 'are', 'was', 'were', 'have', 'has', 'do', 'does', 'did', 'will', 'shall', 'should', 'would', 'could', 'can', 'may', 'might'}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
30
 
31
+ # Initialize the English text classification pipeline for AI detection
32
+ pipeline_en = pipeline(task="text-classification", model="Hello-SimpleAI/chatgpt-detector-roberta")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
 
34
+ # Initialize the spell checker
35
+ spell = SpellChecker()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
36
 
37
+ # Ensure the SpaCy model is installed
38
+ try:
39
+ nlp = spacy.load("en_core_web_sm")
40
+ except OSError:
41
+ subprocess.run(["python", "-m", "spacy", "download", "en_core_web_sm"])
42
+ nlp = spacy.load("en_core_web_sm")
43
+
44
+ def plagiarism_removal(text):
45
+ def plagiarism_remover(word):
46
+ if word.lower() in stop_words or word.lower() in exclude_words or word in string.punctuation:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
47
  return word
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
48
 
49
+ # Find synonyms
50
+ synonyms = set()
51
+ for syn in wordnet.synsets(word):
52
+ for lemma in syn.lemmas():
53
+ if "_" not in lemma.name() and lemma.name().isalpha() and lemma.name().lower() != word.lower():
54
+ synonyms.add(lemma.name())
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
55
 
56
+ pos_tag_word = nltk.pos_tag([word])[0]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
57
 
58
+ if pos_tag_word[1] in exclude_tags:
59
+ return word
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
60
 
61
+ filtered_synonyms = [syn for syn in synonyms if nltk.pos_tag([syn])[0][1] == pos_tag_word[1]]
62
 
63
+ if not filtered_synonyms:
64
+ return word
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
65
 
66
+ synonym_choice = random.choice(filtered_synonyms)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
67
 
68
+ if word.istitle():
69
+ return synonym_choice.title()
70
+ return synonym_choice
 
 
 
 
 
 
 
 
 
 
 
71
 
72
+ para_split = word_tokenize(text)
73
+ final_text = [plagiarism_remover(word) for word in para_split]
74
+
75
+ corrected_text = []
76
+ for i in range(len(final_text)):
77
+ if final_text[i] in string.punctuation and i > 0:
78
+ corrected_text[-1] += final_text[i]
79
+ else:
80
+ corrected_text.append(final_text[i])
81
+
82
+ return " ".join(corrected_text)
83
+
84
+ def predict_en(text):
85
+ res = pipeline_en(text)[0]
86
+ return res['label'], res['score']
87
+
88
+ def remove_redundant_words(text):
89
+ doc = nlp(text)
90
+ meaningless_words = {"actually", "basically", "literally", "really", "very", "just"}
91
+ filtered_text = [token.text for token in doc if token.text.lower() not in meaningless_words]
92
+ return ' '.join(filtered_text)
93
+
94
+ def fix_punctuation_spacing(text):
95
+ words = text.split(' ')
96
+ cleaned_words = []
97
+ punctuation_marks = {',', '.', "'", '!', '?', ':'}
98
+
99
+ for word in words:
100
+ if cleaned_words and word and word[0] in punctuation_marks:
101
+ cleaned_words[-1] += word
102
+ else:
103
+ cleaned_words.append(word)
104
+
105
+ return ' '.join(cleaned_words).replace(' ,', ',').replace(' .', '.').replace(" '", "'") \
106
+ .replace(' !', '!').replace(' ?', '?').replace(' :', ':')
107
+
108
+ def fix_possessives(text):
109
+ text = re.sub(r'(\w)\s\'\s?s', r"\1's", text)
110
+ return text
111
+
112
+ def capitalize_sentences_and_nouns(text):
113
+ doc = nlp(text)
114
+ corrected_text = []
115
+
116
+ for sent in doc.sents:
117
+ sentence = []
118
+ for token in sent:
119
+ if token.i == sent.start:
120
+ sentence.append(token.text.capitalize())
121
+ elif token.pos_ == "PROPN":
122
+ sentence.append(token.text.capitalize())
123
  else:
124
+ sentence.append(token.text)
125
+ corrected_text.append(' '.join(sentence))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
126
 
127
+ return ' '.join(corrected_text)
 
 
 
 
 
 
 
 
128
 
129
+ def force_first_letter_capital(text):
130
+ sentences = re.split(r'(?<=\w[.!?])\s+', text)
131
+ capitalized_sentences = []
 
132
 
133
+ for sentence in sentences:
134
+ if sentence:
135
+ capitalized_sentence = sentence[0].capitalize() + sentence[1:]
136
+ if not re.search(r'[.!?]$', capitalized_sentence):
137
+ capitalized_sentence += '.'
138
+ capitalized_sentences.append(capitalized_sentence)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
139
 
140
+ return " ".join(capitalized_sentences)
141
+
142
+ def correct_tense_errors(text):
143
+ doc = nlp(text)
144
+ corrected_text = []
145
+ for token in doc:
146
+ if token.pos_ == "VERB" and token.dep_ in {"aux", "auxpass"}:
147
+ lemma = wordnet.morphy(token.text, wordnet.VERB) or token.text
148
+ corrected_text.append(lemma)
149
+ else:
150
+ corrected_text.append(token.text)
151
+ return ' '.join(corrected_text)
152
+
153
+ def correct_article_errors(text):
154
+ doc = nlp(text)
155
+ corrected_text = []
156
+ for token in doc:
157
+ if token.text in ['a', 'an']:
158
+ next_token = token.nbor(1)
159
+ if token.text == "a" and next_token.text[0].lower() in "aeiou":
160
+ corrected_text.append("an")
161
+ elif token.text == "an" and next_token.text[0].lower() not in "aeiou":
162
+ corrected_text.append("a")
163
+ else:
164
+ corrected_text.append(token.text)
165
+ else:
166
+ corrected_text.append(token.text)
167
+ return ' '.join(corrected_text)
168
+
169
+ def ensure_subject_verb_agreement(text):
170
+ doc = nlp(text)
171
+ corrected_text = []
172
+ for token in doc:
173
+ if token.dep_ == "nsubj" and token.head.pos_ == "VERB":
174
+ if token.tag_ == "NN" and token.head.tag_ != "VBZ":
175
+ corrected_text.append(token.head.lemma_ + "s")
176
+ elif token.tag_ == "NNS" and token.head.tag_ == "VBZ":
177
+ corrected_text.append(token.head.lemma_)
178
+ corrected_text.append(token.text)
179
+ return ' '.join(corrected_text)
180
+
181
+ def correct_spelling(text):
182
+ words = word_tokenize(text)
183
+ corrected_words = []
184
+
185
+ for word in words:
186
+ corrected_word = spell.candidates(word)
187
+ if corrected_word:
188
+ corrected_words.append(spell.candidates(word).pop()) # Choose the first candidate as the correction
189
+ else:
190
+ corrected_words.append(word) # If it's not misspelled, keep the original word
191
+
192
+ return ' '.join(corrected_words)
193
+
194
+ def paraphrase_and_correct(text):
195
+ paragraphs = text.split("\n\n") # Split by paragraphs
196
+
197
+ # Process each paragraph separately
198
+ processed_paragraphs = []
199
+ for paragraph in paragraphs:
200
+ cleaned_text = remove_redundant_words(paragraph)
201
+ plag_removed = plagiarism_removal(cleaned_text)
202
+ paraphrased_text = capitalize_sentences_and_nouns(plag_removed)
203
+ paraphrased_text = force_first_letter_capital(paraphrased_text)
204
+ paraphrased_text = correct_article_errors(paraphrased_text)
205
+ paraphrased_text = correct_tense_errors(paraphrased_text)
206
+ paraphrased_text = ensure_subject_verb_agreement(paraphrased_text)
207
+ paraphrased_text = fix_possessives(paraphrased_text)
208
+ paraphrased_text = correct_spelling(paraphrased_text) # Spelling correction
209
+ paraphrased_text = fix_punctuation_spacing(paraphrased_text)
210
+ processed_paragraphs.append(paraphrased_text)
211
+
212
+ return "\n\n".join(processed_paragraphs) # Reassemble the text with paragraphs
213
+
214
+ # Gradio app setup
215
+ with gr.Blocks() as demo:
216
+ with gr.Tab("AI Detection"):
217
+ t1 = gr.Textbox(lines=5, label='Text')
218
+ button1 = gr.Button("🤖 Predict!")
219
+ label1 = gr.Textbox(lines=1, label='Predicted Label 🎃')
220
+ score1 = gr.Textbox(lines=1, label='Prob')
221
+
222
+ button1.click(fn=predict_en, inputs=t1, outputs=[label1, score1])
223
+
224
+ with gr.Tab("Paraphrasing & Grammar Correction"):
225
+ t2 = gr.Textbox(lines=5, label='Enter text for paraphrasing and grammar correction')
226
+ button2 = gr.Button("🔄 Paraphrase and Correct")
227
+ result2 = gr.Textbox(lines=5, label='Corrected Text')
228
+
229
+ button2.click(fn=paraphrase_and_correct, inputs=t2, outputs=result2)
230
+
231
+ demo.launch(share=True)