Jay-Rajput commited on
Commit
59f5880
Β·
1 Parent(s): 1fc4ee7

ai detector new

Browse files
Files changed (1) hide show
  1. app.py +460 -357
app.py CHANGED
@@ -1,8 +1,15 @@
1
  import gradio as gr
2
  import torch
3
- from transformers import AutoTokenizer, AutoModelForSequenceClassification, GPT2LMHeadModel, GPT2TokenizerFast
 
 
 
 
 
 
 
 
4
  import numpy as np
5
- from scipy import stats
6
  import re
7
  from collections import Counter
8
  import math
@@ -11,466 +18,562 @@ warnings.filterwarnings('ignore')
11
 
12
  class AdvancedAITextDetector:
13
  def __init__(self):
14
- """Initialize the AI Text Detector with multiple detection methods"""
15
- self.models_loaded = {}
 
 
 
16
 
17
- # Load multiple models for ensemble detection
18
- self.load_models()
 
19
 
20
- def load_models(self):
21
- """Load multiple detection models for ensemble approach"""
22
  try:
23
- # Model 1: RoBERTa-based detector (more accurate)
24
- self.roberta_tokenizer = AutoTokenizer.from_pretrained("roberta-base-openai-detector")
25
- self.roberta_model = AutoModelForSequenceClassification.from_pretrained("roberta-base-openai-detector")
26
- self.roberta_model.eval()
27
- self.models_loaded['roberta'] = True
28
- except:
29
- print("Warning: Could not load RoBERTa detector")
30
- self.models_loaded['roberta'] = False
 
 
 
 
 
 
31
 
 
32
  try:
33
- # Model 2: Alternative detector
34
- self.alt_tokenizer = AutoTokenizer.from_pretrained("Hello-SimpleAI/chatgpt-detector-roberta")
35
- self.alt_model = AutoModelForSequenceClassification.from_pretrained("Hello-SimpleAI/chatgpt-detector-roberta")
36
- self.alt_model.eval()
37
- self.models_loaded['alt'] = True
 
38
  except:
39
- print("Warning: Could not load alternative detector")
40
- self.models_loaded['alt'] = False
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
41
 
 
42
  try:
43
- # GPT-2 for perplexity calculation
44
- self.gpt2_tokenizer = GPT2TokenizerFast.from_pretrained("gpt2")
45
- self.gpt2_model = GPT2LMHeadModel.from_pretrained("gpt2")
46
- self.gpt2_model.eval()
47
- self.models_loaded['gpt2'] = True
 
48
  except:
49
- print("Warning: Could not load GPT-2 for perplexity")
50
- self.models_loaded['gpt2'] = False
 
 
 
 
 
 
 
 
 
 
51
 
52
- def calculate_gpt2_perplexity(self, text):
53
- """Calculate perplexity using GPT-2 - lower perplexity suggests AI text"""
54
- if not self.models_loaded.get('gpt2', False):
55
  return None
56
 
57
  try:
58
- encodings = self.gpt2_tokenizer(text, return_tensors='pt', truncation=True, max_length=512)
59
- max_length = encodings.input_ids.size(1)
 
 
 
 
 
60
 
61
  with torch.no_grad():
62
- outputs = self.gpt2_model(**encodings, labels=encodings.input_ids)
63
  loss = outputs.loss
64
  perplexity = torch.exp(loss).item()
65
 
66
- # Normalize perplexity to 0-1 scale (lower perplexity = more likely AI)
67
- # Typical human text: 20-60, AI text: 10-30
68
- normalized = 1 - min(max((perplexity - 10) / 50, 0), 1)
69
- return normalized
70
- except:
 
 
 
 
 
 
 
 
 
 
71
  return None
72
 
73
- def detect_chatgpt_patterns(self, text):
74
- """Detect specific ChatGPT writing patterns"""
75
- patterns_score = 0
76
- pattern_count = 0
77
-
78
- # ChatGPT often uses these phrases
79
- chatgpt_phrases = [
80
- r'\bI understand\b',
81
- r'\bIt\'s important to note\b',
82
- r'\bIt\'s worth noting\b',
83
- r'\bIn conclusion\b',
84
- r'\bHowever,\s',
85
- r'\bMoreover,\s',
86
- r'\bFurthermore,\s',
87
- r'\bAdditionally,\s',
88
- r'\bIn summary\b',
89
- r'\bTo summarize\b',
90
- r'\boverall,\s',
91
- r'\bGenerally speaking\b',
92
- r'\bTypically,\s',
93
- r'\bEssentially,\s',
94
- r'\bFundamentally,\s',
95
- r'\bIt\'s crucial\b',
96
- r'\bIt\'s essential\b',
97
- r'\bRemember that\b',
98
- r'\bKeep in mind\b',
99
- r'\bThis means that\b',
100
- r'\bThis suggests that\b',
101
- r'\bwhich means\b',
102
- r'\bthat being said\b',
103
- r'\bon the other hand\b',
104
- ]
105
-
106
- text_lower = text.lower()
107
- for pattern in chatgpt_phrases:
108
- if re.search(pattern.lower(), text_lower):
109
- pattern_count += 1
110
-
111
- # Calculate pattern density
112
- patterns_score = min(pattern_count / 5, 1.0) # Normalize to 0-1
113
-
114
- # Check for numbered or bulleted lists (common in ChatGPT)
115
- has_numbered_list = bool(re.search(r'\n\d+\.', text))
116
- has_bullets = bool(re.search(r'\n[-β€’*]\s', text))
117
-
118
- if has_numbered_list or has_bullets:
119
- patterns_score = min(patterns_score + 0.2, 1.0)
120
-
121
- # Check for balanced paragraph structure (AI characteristic)
122
- paragraphs = text.split('\n\n')
123
- if len(paragraphs) > 2:
124
- lengths = [len(p.split()) for p in paragraphs if p.strip()]
125
- if lengths:
126
- cv = np.std(lengths) / np.mean(lengths) if np.mean(lengths) > 0 else 1
127
- if cv < 0.3: # Low variation in paragraph lengths
128
- patterns_score = min(patterns_score + 0.15, 1.0)
129
-
130
- return patterns_score
131
-
132
- def calculate_sentence_complexity_variance(self, text):
133
- """Calculate variance in sentence complexity - AI text is more uniform"""
134
- sentences = re.split(r'[.!?]+', text)
135
- complexities = []
136
-
137
- for sentence in sentences:
138
- if sentence.strip():
139
- words = sentence.split()
140
- if len(words) > 0:
141
- # Calculate complexity based on word length and sentence length
142
- avg_word_length = np.mean([len(w) for w in words])
143
- complexity = len(words) * (avg_word_length / 5)
144
- complexities.append(complexity)
145
-
146
- if len(complexities) < 2:
147
- return 0.5
148
-
149
- # Lower variance suggests AI (more uniform complexity)
150
- cv = np.std(complexities) / np.mean(complexities) if np.mean(complexities) > 0 else 0
151
- return 1 - min(cv / 0.5, 1.0) # Normalize and invert
152
-
153
- def calculate_word_frequency_distribution(self, text):
154
- """Analyze word frequency distribution - AI text follows Zipf's law more closely"""
155
- words = re.findall(r'\b\w+\b', text.lower())
156
- word_freq = Counter(words)
157
-
158
- if len(word_freq) < 10:
159
- return 0.5
160
-
161
- frequencies = sorted(word_freq.values(), reverse=True)[:50] # Top 50 words
162
- ranks = range(1, len(frequencies) + 1)
163
-
164
- # Calculate how well it fits Zipf's law (AI text fits better)
165
- if len(frequencies) > 1:
166
- log_ranks = np.log(ranks)
167
- log_freqs = np.log(frequencies)
168
-
169
- # Calculate correlation with Zipf's law
170
- correlation = abs(np.corrcoef(log_ranks, log_freqs)[0, 1])
171
-
172
- # Higher correlation suggests AI
173
- return correlation
174
-
175
- return 0.5
176
-
177
- def detect_roberta(self, text):
178
- """Use RoBERTa OpenAI detector"""
179
- if not self.models_loaded.get('roberta', False):
180
  return None
181
 
182
  try:
183
- inputs = self.roberta_tokenizer(text, return_tensors="pt", truncation=True,
184
- max_length=512, padding=True)
 
 
 
 
 
185
 
186
  with torch.no_grad():
187
- outputs = self.roberta_model(**inputs)
188
- predictions = torch.softmax(outputs.logits, dim=-1)
189
-
190
- # Class 0 is "Real", Class 1 is "Fake" for this model
191
- ai_probability = predictions[0][1].item()
192
 
193
- return ai_probability
194
- except:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
195
  return None
196
 
197
- def detect_alternative(self, text):
198
- """Use alternative detector model"""
199
- if not self.models_loaded.get('alt', False):
200
- return None
 
 
 
 
 
 
 
201
 
202
- try:
203
- inputs = self.alt_tokenizer(text, return_tensors="pt", truncation=True,
204
- max_length=512, padding=True)
 
 
 
 
 
 
 
 
 
 
 
205
 
206
- with torch.no_grad():
207
- outputs = self.alt_model(**inputs)
208
- predictions = torch.softmax(outputs.logits, dim=-1)
209
- ai_probability = predictions[0][1].item()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
210
 
211
- return ai_probability
212
- except:
213
- return None
214
-
215
- def enhanced_statistical_analysis(self, text):
216
- """Enhanced statistical analysis specifically tuned for ChatGPT detection"""
217
-
218
- # Calculate all metrics
219
- chatgpt_patterns = self.detect_chatgpt_patterns(text)
220
- sentence_complexity = self.calculate_sentence_complexity_variance(text)
221
- word_freq_dist = self.calculate_word_frequency_distribution(text)
222
-
223
- # Existing metrics with adjusted weights
224
- words = text.split()
225
- sentences = re.split(r'[.!?]+', text)
226
-
227
- # Sentence length consistency (AI is more consistent)
228
- sentence_lengths = [len(s.split()) for s in sentences if s.strip()]
229
- if len(sentence_lengths) > 1:
230
- cv_sentence = np.std(sentence_lengths) / np.mean(sentence_lengths)
231
- sentence_consistency = 1 - min(cv_sentence / 0.5, 1.0)
232
- else:
233
- sentence_consistency = 0.5
234
-
235
- # Vocabulary repetition rate
236
- word_counts = Counter(words)
237
- words_used_once = sum(1 for count in word_counts.values() if count == 1)
238
- repetition_rate = 1 - (words_used_once / len(words)) if words else 0.5
239
-
240
- # Conjunction and transition word density
241
- transitions = ['however', 'therefore', 'moreover', 'furthermore', 'additionally',
242
- 'consequently', 'nevertheless', 'nonetheless', 'meanwhile', 'subsequently']
243
- transition_count = sum(1 for w in words if w.lower() in transitions)
244
- transition_density = min(transition_count / len(words) * 100, 1.0) if words else 0
245
-
246
- # Combine all statistical features with optimized weights
247
- statistical_score = (
248
- chatgpt_patterns * 0.35 + # Strongest indicator
249
- sentence_complexity * 0.20 + # Uniform complexity
250
- word_freq_dist * 0.15 + # Zipf's law adherence
251
- sentence_consistency * 0.15 + # Consistent sentence lengths
252
- repetition_rate * 0.10 + # Word repetition
253
- transition_density * 0.05 # Transition word usage
254
- )
255
-
256
- return statistical_score, {
257
- 'chatgpt_patterns': chatgpt_patterns,
258
- 'sentence_uniformity': sentence_complexity,
259
- 'zipf_correlation': word_freq_dist,
260
- 'sentence_consistency': sentence_consistency,
261
- 'repetition_rate': repetition_rate,
262
- 'transition_density': transition_density
 
 
 
 
263
  }
 
 
 
 
 
 
 
 
 
 
 
 
264
 
265
  def detect(self, text):
266
- """Main detection method with ensemble approach"""
267
- if not text or len(text.strip()) < 20:
268
  return {
269
  "ai_probability": 50.0,
270
- "classification": "Undetermined",
271
- "confidence": "Low",
272
- "explanation": "Text too short for accurate analysis. Please provide at least 50 characters.",
273
  "detailed_scores": {}
274
  }
275
 
276
- scores = []
277
- weights = []
278
-
279
- # Get RoBERTa OpenAI detector score (most accurate for ChatGPT)
280
- roberta_score = self.detect_roberta(text)
281
- if roberta_score is not None:
282
- scores.append(roberta_score)
283
- weights.append(0.4) # Highest weight for most accurate model
284
-
285
- # Get alternative model score
286
- alt_score = self.detect_alternative(text)
287
- if alt_score is not None:
288
- scores.append(alt_score)
289
- weights.append(0.2)
290
-
291
- # Get GPT-2 perplexity score
292
- perplexity_score = self.calculate_gpt2_perplexity(text)
293
- if perplexity_score is not None:
294
- scores.append(perplexity_score)
295
- weights.append(0.15)
296
-
297
- # Get enhanced statistical analysis
298
- stat_score, stat_details = self.enhanced_statistical_analysis(text)
299
- scores.append(stat_score)
300
- weights.append(0.25 if len(scores) == 1 else 0.25)
301
-
302
- # Calculate weighted average
303
- if scores:
 
 
 
 
 
 
 
 
 
304
  # Normalize weights
305
- weights = [w / sum(weights) for w in weights]
306
- final_score = sum(s * w for s, w in zip(scores, weights))
 
 
 
 
 
 
 
 
 
 
 
 
307
  else:
308
  final_score = 0.5
309
 
310
- # Adjust classification thresholds for better ChatGPT detection
311
- if final_score >= 0.75:
312
- classification = "AI-Generated (Likely ChatGPT)"
313
- confidence = "High"
314
  elif final_score >= 0.55:
315
- classification = "Probably AI-Generated"
316
- confidence = "Medium-High"
317
  elif final_score >= 0.45:
318
- classification = "Uncertain (Mixed Signals)"
319
- confidence = "Low"
320
- elif final_score >= 0.25:
321
- classification = "Probably Human-Written"
322
- confidence = "Medium"
323
  else:
324
- classification = "Human-Written"
325
- confidence = "High"
326
 
327
- # Generate detailed explanation
328
- explanation = self._generate_explanation(final_score, stat_details, {
329
- 'roberta': roberta_score,
330
- 'alternative': alt_score,
331
- 'perplexity': perplexity_score
332
- })
333
 
334
  return {
335
  "ai_probability": round(final_score * 100, 2),
336
  "classification": classification,
337
  "confidence": confidence,
338
  "explanation": explanation,
339
- "detailed_scores": stat_details,
340
- "model_scores": {
341
- 'roberta_openai': roberta_score,
342
- 'alternative': alt_score,
343
- 'perplexity': perplexity_score,
344
- 'statistical': stat_score
345
- }
346
  }
347
 
348
- def _generate_explanation(self, score, stat_details, model_scores):
349
- """Generate detailed explanation of the detection result"""
350
- explanations = []
351
 
352
  # Overall assessment
353
- if score >= 0.75:
354
- explanations.append("πŸ€– Strong indicators of AI generation detected, consistent with ChatGPT patterns.")
355
  elif score >= 0.55:
356
- explanations.append("⚠️ Multiple AI characteristics detected, suggesting probable AI generation.")
357
  elif score >= 0.45:
358
- explanations.append("❓ Mixed characteristics - could be AI-assisted or heavily edited human text.")
359
- elif score >= 0.25:
360
- explanations.append("✍️ Predominantly human characteristics with some regularities.")
361
  else:
362
- explanations.append("πŸ‘€ Strong human writing characteristics detected.")
363
 
364
- # Model-specific insights
365
- if model_scores.get('roberta') is not None:
366
- if model_scores['roberta'] > 0.7:
367
- explanations.append("\nβ€’ OpenAI detector: Strong AI signature")
368
- elif model_scores['roberta'] < 0.3:
369
- explanations.append("\nβ€’ OpenAI detector: Strong human signature")
 
 
 
370
 
371
- # Pattern analysis
372
  if stat_details.get('chatgpt_patterns', 0) > 0.5:
373
- explanations.append("\nβ€’ High density of ChatGPT-typical phrases and structures")
374
-
375
- if stat_details.get('sentence_uniformity', 0) > 0.7:
376
- explanations.append("\nβ€’ Unusually uniform sentence complexity (AI characteristic)")
377
- elif stat_details.get('sentence_uniformity', 0) < 0.3:
378
- explanations.append("\nβ€’ Variable sentence complexity (human characteristic)")
379
-
380
- if stat_details.get('zipf_correlation', 0) > 0.8:
381
- explanations.append("\nβ€’ Word frequency distribution closely follows Zipf's law (AI-like)")
382
 
383
- return " ".join(explanations)
 
 
 
 
 
 
 
384
 
385
  # Initialize detector
 
386
  detector = AdvancedAITextDetector()
387
 
388
  def analyze_text(text):
389
  """Gradio interface function"""
 
 
 
390
  result = detector.detect(text)
391
 
392
- # Format output for Gradio
393
- output = f"""
394
- ## πŸ” Detection Result
 
395
 
396
- **Classification:** {result['classification']}
397
- **AI Probability:** {result['ai_probability']}%
398
- **Confidence Level:** {result['confidence']}
399
 
400
- ### πŸ“Š Analysis Details
 
 
401
  {result['explanation']}
402
 
403
- ### πŸ“ˆ Model Scores
 
 
404
  """
405
 
406
  if result.get('model_scores'):
407
  for model, score in result['model_scores'].items():
408
  if score is not None:
409
- model_name = model.replace('_', ' ').title()
410
- output += f"- {model_name}: {round(score * 100, 2)}%\n"
411
-
412
- output += "\n### πŸ”¬ Statistical Metrics\n"
 
 
 
 
 
 
 
 
 
 
413
 
414
- if result['detailed_scores']:
415
- for metric, value in result['detailed_scores'].items():
416
- metric_name = metric.replace('_', ' ').title()
417
- percentage = round(value * 100, 1)
418
- output += f"- {metric_name}: {percentage}%\n"
 
 
 
 
 
 
 
 
 
419
 
420
- # Create visual probability bar
421
  ai_prob = result['ai_probability']
422
  human_prob = 100 - ai_prob
423
 
424
- bar_chart = f"""
425
- ### πŸ“Š Probability Distribution
 
 
 
 
426
  ```
427
- AI-Generated: {'β–ˆ' * int(ai_prob/5)}{'β–‘' * (20-int(ai_prob/5))} {ai_prob}%
428
- Human-Written: {'β–ˆ' * int(human_prob/5)}{'β–‘' * (20-int(human_prob/5))} {human_prob}%
429
  ```
430
  """
431
 
432
- # Add warning for edge cases
433
- if result['confidence'] == "Low":
434
- bar_chart += "\n⚠️ **Note:** Low confidence - results may be unreliable. Consider additional verification."
435
 
436
- return output + bar_chart
437
 
438
  # Create Gradio interface
439
  interface = gr.Interface(
440
  fn=analyze_text,
441
  inputs=gr.Textbox(
442
- lines=10,
443
- placeholder="Paste the text you want to analyze here...",
444
- label="Input Text"
445
  ),
446
- outputs=gr.Markdown(label="Analysis Result"),
447
- title="πŸ” Advanced ChatGPT & AI Text Detector",
448
  description="""
449
- This enhanced AI text detector uses state-of-the-art techniques specifically optimized for detecting ChatGPT and similar AI-generated content:
450
 
451
- ### πŸš€ Key Features:
452
- - **Multiple AI Detection Models** including OpenAI's RoBERTa detector
453
- - **GPT-2 Perplexity Analysis** to measure text predictability
454
- - **ChatGPT Pattern Recognition** detecting characteristic phrases and structures
455
- - **Advanced Statistical Analysis** including Zipf's law correlation and sentence uniformity
456
- - **Ensemble Method** combining multiple approaches for maximum accuracy
457
 
458
- ### πŸ“ Usage Tips:
459
- - Provide at least 100 words for best results
460
- - The detector is specifically tuned for ChatGPT/GPT-4 content
461
  - Works best with English text
462
- - Longer texts generally yield more reliable results
463
 
464
- ### ⚠️ Important:
465
- This tool provides probabilistic analysis, not absolute certainty. Use it as one of multiple factors in your assessment.
 
 
 
 
 
466
  """,
467
  examples=[
468
- ["The impact of artificial intelligence on modern society is profound and multifaceted. As we navigate this technological revolution, it's important to consider both the opportunities and challenges that AI presents. On one hand, AI systems are enhancing productivity, improving healthcare outcomes, and enabling new forms of creativity. On the other hand, concerns about job displacement, privacy, and algorithmic bias require careful consideration. Moving forward, it will be crucial for policymakers, technologists, and society as a whole to work together in shaping the development and deployment of AI in ways that benefit humanity while mitigating potential risks."],
469
- ["So I was walking down the street yesterday, right? And this crazy thing happened - I mean, you won't believe it. There was this dog, just a regular golden retriever, but it was wearing these ridiculous sunglasses. Like, who puts sunglasses on a dog? Anyway, the owner was this old lady, must've been like 80 or something, and she was just chatting away on her phone, completely oblivious. The dog looked so confused! I couldn't help but laugh. Sometimes you see the weirdest stuff when you're just out and about, you know? Made my whole day, honestly. Still cracks me up thinking about it."],
470
- ["Machine learning has revolutionized data analysis. Furthermore, deep learning algorithms have shown remarkable success in computer vision tasks. Additionally, natural language processing has made significant strides. It's worth noting that transformer architectures have been particularly influential. Moreover, these developments have practical applications across industries. In conclusion, the continued advancement of ML techniques promises further innovations."]
 
 
 
 
 
471
  ],
472
- theme=gr.themes.Soft(),
473
- analytics_enabled=False
 
 
 
 
 
474
  )
475
 
476
  if __name__ == "__main__":
 
1
  import gradio as gr
2
  import torch
3
+ import torch.nn.functional as F
4
+ from transformers import (
5
+ AutoTokenizer,
6
+ AutoModelForSequenceClassification,
7
+ GPT2LMHeadModel,
8
+ GPT2TokenizerFast,
9
+ BertTokenizer,
10
+ BertForSequenceClassification
11
+ )
12
  import numpy as np
 
13
  import re
14
  from collections import Counter
15
  import math
 
18
 
19
  class AdvancedAITextDetector:
20
  def __init__(self):
21
+ """Initialize with multiple specialized detection models"""
22
+ self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
23
+ self.models = {}
24
+ self.tokenizers = {}
25
+ self.load_all_models()
26
 
27
+ def load_all_models(self):
28
+ """Load ensemble of detection models"""
29
+ print("Loading detection models...")
30
 
31
+ # Priority 1: GPTZero-like detection using DeBERTa
 
32
  try:
33
+ from transformers import AutoModelForSequenceClassification, AutoTokenizer
34
+ model_name = "unitary/unbiased-toxic-roberta" # Fallback model
35
+ # Try to load a better model if available
36
+ try:
37
+ model_name = "PirateXX/AI-Content-Detector"
38
+ self.tokenizers['pirate'] = AutoTokenizer.from_pretrained(model_name)
39
+ self.models['pirate'] = AutoModelForSequenceClassification.from_pretrained(model_name)
40
+ self.models['pirate'].to(self.device)
41
+ self.models['pirate'].eval()
42
+ print("βœ“ Loaded PirateXX AI detector")
43
+ except:
44
+ pass
45
+ except Exception as e:
46
+ print(f"Could not load priority model: {e}")
47
 
48
+ # Priority 2: Synthetic text detector
49
  try:
50
+ model_name = "Hello-SimpleAI/chatgpt-detector-roberta-chinese" # Multi-lingual tends to be better
51
+ self.tokenizers['multilingual'] = AutoTokenizer.from_pretrained(model_name)
52
+ self.models['multilingual'] = AutoModelForSequenceClassification.from_pretrained(model_name)
53
+ self.models['multilingual'].to(self.device)
54
+ self.models['multilingual'].eval()
55
+ print("βœ“ Loaded multilingual detector")
56
  except:
57
+ try:
58
+ # Fallback to English version
59
+ model_name = "Hello-SimpleAI/chatgpt-detector-roberta"
60
+ self.tokenizers['roberta_detector'] = AutoTokenizer.from_pretrained(model_name)
61
+ self.models['roberta_detector'] = AutoModelForSequenceClassification.from_pretrained(model_name)
62
+ self.models['roberta_detector'].to(self.device)
63
+ self.models['roberta_detector'].eval()
64
+ print("βœ“ Loaded SimpleAI ChatGPT detector")
65
+ except Exception as e:
66
+ print(f"Could not load SimpleAI detector: {e}")
67
+
68
+ # Priority 3: OpenAI's detector
69
+ try:
70
+ model_name = "roberta-base-openai-detector"
71
+ self.tokenizers['openai'] = AutoTokenizer.from_pretrained(model_name)
72
+ self.models['openai'] = AutoModelForSequenceClassification.from_pretrained(model_name)
73
+ self.models['openai'].to(self.device)
74
+ self.models['openai'].eval()
75
+ print("βœ“ Loaded OpenAI RoBERTa detector")
76
+ except Exception as e:
77
+ print(f"Could not load OpenAI detector: {e}")
78
 
79
+ # Priority 4: GPT-2 for perplexity
80
  try:
81
+ self.tokenizers['gpt2'] = GPT2TokenizerFast.from_pretrained("gpt2-medium")
82
+ self.models['gpt2'] = GPT2LMHeadModel.from_pretrained("gpt2-medium")
83
+ self.models['gpt2'].to(self.device)
84
+ self.models['gpt2'].eval()
85
+ self.tokenizers['gpt2'].pad_token = self.tokenizers['gpt2'].eos_token
86
+ print("βœ“ Loaded GPT-2 Medium for perplexity")
87
  except:
88
+ try:
89
+ self.tokenizers['gpt2'] = GPT2TokenizerFast.from_pretrained("gpt2")
90
+ self.models['gpt2'] = GPT2LMHeadModel.from_pretrained("gpt2")
91
+ self.models['gpt2'].to(self.device)
92
+ self.models['gpt2'].eval()
93
+ self.tokenizers['gpt2'].pad_token = self.tokenizers['gpt2'].eos_token
94
+ print("βœ“ Loaded GPT-2 for perplexity")
95
+ except Exception as e:
96
+ print(f"Could not load GPT-2: {e}")
97
+
98
+ if not self.models:
99
+ print("WARNING: No models loaded, using statistical methods only")
100
 
101
+ def calculate_perplexity(self, text):
102
+ """Calculate perplexity - lower values indicate AI text"""
103
+ if 'gpt2' not in self.models:
104
  return None
105
 
106
  try:
107
+ encodings = self.tokenizers['gpt2'](
108
+ text,
109
+ return_tensors='pt',
110
+ truncation=True,
111
+ max_length=512,
112
+ padding=True
113
+ ).to(self.device)
114
 
115
  with torch.no_grad():
116
+ outputs = self.models['gpt2'](**encodings, labels=encodings.input_ids)
117
  loss = outputs.loss
118
  perplexity = torch.exp(loss).item()
119
 
120
+ # Lower perplexity (< 30) strongly suggests AI
121
+ # Higher perplexity (> 50) suggests human
122
+ if perplexity < 20:
123
+ return 0.9 # Very likely AI
124
+ elif perplexity < 30:
125
+ return 0.7 # Likely AI
126
+ elif perplexity < 50:
127
+ return 0.5 # Uncertain
128
+ elif perplexity < 100:
129
+ return 0.3 # Likely human
130
+ else:
131
+ return 0.1 # Very likely human
132
+
133
+ except Exception as e:
134
+ print(f"Perplexity calculation error: {e}")
135
  return None
136
 
137
+ def detect_with_model(self, text, model_name):
138
+ """Generic detection using any loaded model"""
139
+ if model_name not in self.models:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
140
  return None
141
 
142
  try:
143
+ inputs = self.tokenizers[model_name](
144
+ text,
145
+ return_tensors="pt",
146
+ truncation=True,
147
+ max_length=512,
148
+ padding=True
149
+ ).to(self.device)
150
 
151
  with torch.no_grad():
152
+ outputs = self.models[model_name](**inputs)
153
+ logits = outputs.logits
 
 
 
154
 
155
+ # Handle different model output formats
156
+ if model_name == 'openai':
157
+ # OpenAI detector: 0=Real, 1=Fake
158
+ probs = F.softmax(logits, dim=-1)
159
+ ai_prob = probs[0][1].item()
160
+ elif model_name in ['roberta_detector', 'multilingual']:
161
+ # SimpleAI: typically 1=AI
162
+ probs = F.softmax(logits, dim=-1)
163
+ ai_prob = probs[0][1].item() if probs.shape[1] > 1 else probs[0][0].item()
164
+ elif model_name == 'pirate':
165
+ # May have different class arrangement
166
+ probs = F.softmax(logits, dim=-1)
167
+ # Assuming binary classification
168
+ ai_prob = probs[0][1].item() if probs.shape[1] > 1 else probs[0][0].item()
169
+ else:
170
+ probs = F.softmax(logits, dim=-1)
171
+ ai_prob = probs[0][1].item() if probs.shape[1] > 1 else 0.5
172
+
173
+ return ai_prob
174
+
175
+ except Exception as e:
176
+ print(f"Error with {model_name}: {e}")
177
  return None
178
 
179
+ def advanced_linguistic_analysis(self, text):
180
+ """Comprehensive linguistic analysis for AI detection"""
181
+ scores = {}
182
+
183
+ # 1. Sentence-level analysis
184
+ sentences = [s.strip() for s in re.split(r'[.!?]+', text) if s.strip()]
185
+
186
+ if len(sentences) > 1:
187
+ # Sentence length variance (AI is more consistent)
188
+ sent_lengths = [len(s.split()) for s in sentences]
189
+ scores['sent_length_std'] = np.std(sent_lengths) / (np.mean(sent_lengths) + 1)
190
 
191
+ # Sentence starter diversity (AI often starts sentences similarly)
192
+ starters = [s.split()[0].lower() for s in sentences if s.split()]
193
+ starter_diversity = len(set(starters)) / len(starters) if starters else 0
194
+ scores['starter_diversity'] = starter_diversity
195
+
196
+ # 2. N-gram analysis
197
+ words = text.lower().split()
198
+
199
+ if len(words) > 3:
200
+ # Trigram repetition (AI repeats phrases more)
201
+ trigrams = [tuple(words[i:i+3]) for i in range(len(words)-2)]
202
+ trigram_counts = Counter(trigrams)
203
+ repeated_trigrams = sum(1 for c in trigram_counts.values() if c > 1)
204
+ scores['trigram_repetition'] = repeated_trigrams / len(trigrams) if trigrams else 0
205
 
206
+ # Bigram diversity
207
+ bigrams = [tuple(words[i:i+2]) for i in range(len(words)-1)]
208
+ bigram_diversity = len(set(bigrams)) / len(bigrams) if bigrams else 0
209
+ scores['bigram_diversity'] = bigram_diversity
210
+
211
+ # 3. ChatGPT-specific patterns
212
+ chatgpt_score = 0
213
+
214
+ # Common ChatGPT phrases (weighted by specificity)
215
+ high_confidence_phrases = [
216
+ "it's important to note", "it's worth noting", "it's crucial to",
217
+ "in conclusion", "to summarize", "in summary",
218
+ "let me explain", "let me break", "I'll explain",
219
+ "here's a", "here are some", "this involves",
220
+ "additionally", "furthermore", "moreover",
221
+ "essentially", "basically", "fundamentally",
222
+ "it's essential to", "remember that", "keep in mind"
223
+ ]
224
+
225
+ medium_confidence_phrases = [
226
+ "however", "therefore", "thus", "hence",
227
+ "for example", "for instance", "specifically",
228
+ "generally", "typically", "usually", "often",
229
+ "in other words", "that being said", "that said"
230
+ ]
231
+
232
+ text_lower = text.lower()
233
+
234
+ # Check high confidence phrases
235
+ for phrase in high_confidence_phrases:
236
+ if phrase in text_lower:
237
+ chatgpt_score += 0.15
238
 
239
+ # Check medium confidence phrases
240
+ for phrase in medium_confidence_phrases:
241
+ if phrase in text_lower:
242
+ chatgpt_score += 0.08
243
+
244
+ # Check for structured lists (very common in ChatGPT)
245
+ has_numbered = bool(re.search(r'\n\s*\d+[\.\)]\s', text))
246
+ has_bullets = bool(re.search(r'\n\s*[-β€’*]\s', text))
247
+ has_colons = text.count(':') > 2
248
+
249
+ if has_numbered:
250
+ chatgpt_score += 0.25
251
+ if has_bullets:
252
+ chatgpt_score += 0.20
253
+ if has_colons:
254
+ chatgpt_score += 0.10
255
+
256
+ # Formal tone indicators
257
+ formal_words = ['utilize', 'implement', 'facilitate', 'enhance', 'optimize',
258
+ 'comprehensive', 'significant', 'substantial', 'various', 'numerous']
259
+ formal_count = sum(1 for word in formal_words if word in text_lower)
260
+ chatgpt_score += min(formal_count * 0.05, 0.25)
261
+
262
+ scores['chatgpt_patterns'] = min(chatgpt_score, 1.0)
263
+
264
+ # 4. Complexity uniformity (AI has uniform complexity)
265
+ if len(sentences) > 2:
266
+ complexities = []
267
+ for sent in sentences:
268
+ words_in_sent = sent.split()
269
+ if words_in_sent:
270
+ avg_word_len = np.mean([len(w) for w in words_in_sent])
271
+ complexity = len(words_in_sent) * avg_word_len / 5
272
+ complexities.append(complexity)
273
+
274
+ if complexities:
275
+ cv = np.std(complexities) / (np.mean(complexities) + 1)
276
+ scores['complexity_variance'] = cv
277
+
278
+ # 5. Paragraph structure (AI has consistent paragraphs)
279
+ paragraphs = [p.strip() for p in text.split('\n\n') if p.strip()]
280
+ if len(paragraphs) > 1:
281
+ para_lengths = [len(p.split()) for p in paragraphs]
282
+ para_cv = np.std(para_lengths) / (np.mean(para_lengths) + 1)
283
+ scores['paragraph_consistency'] = 1 - min(para_cv, 1.0)
284
+
285
+ # Calculate final statistical score
286
+ # Weight the features based on their importance for ChatGPT detection
287
+ weights = {
288
+ 'chatgpt_patterns': 0.35,
289
+ 'sent_length_std': -0.15, # Lower std = more AI
290
+ 'starter_diversity': -0.10, # Lower diversity = more AI
291
+ 'trigram_repetition': 0.15,
292
+ 'bigram_diversity': -0.10,
293
+ 'complexity_variance': -0.10,
294
+ 'paragraph_consistency': 0.15
295
  }
296
+
297
+ final_score = 0.5 # Start neutral
298
+ for feature, value in scores.items():
299
+ if feature in weights:
300
+ weight = weights[feature]
301
+ if weight < 0:
302
+ # Inverse relationship
303
+ final_score += abs(weight) * (1 - value)
304
+ else:
305
+ final_score += weight * value
306
+
307
+ return min(max(final_score, 0), 1), scores
308
 
309
  def detect(self, text):
310
+ """Main detection combining all methods"""
311
+ if not text or len(text.strip()) < 30:
312
  return {
313
  "ai_probability": 50.0,
314
+ "classification": "Text Too Short",
315
+ "confidence": "N/A",
316
+ "explanation": "Please provide at least 30 characters of text for analysis.",
317
  "detailed_scores": {}
318
  }
319
 
320
+ all_scores = []
321
+ all_weights = []
322
+ model_results = {}
323
+
324
+ # 1. Try each model
325
+ model_weights = {
326
+ 'pirate': 0.30, # If specialized detector available
327
+ 'openai': 0.25, # OpenAI's own detector
328
+ 'multilingual': 0.20, # Multilingual detector
329
+ 'roberta_detector': 0.20,
330
+ 'perplexity': 0.25
331
+ }
332
+
333
+ # Get model predictions
334
+ for model_name in ['pirate', 'openai', 'multilingual', 'roberta_detector']:
335
+ if model_name in self.models:
336
+ score = self.detect_with_model(text, model_name)
337
+ if score is not None:
338
+ all_scores.append(score)
339
+ all_weights.append(model_weights.get(model_name, 0.15))
340
+ model_results[model_name] = score
341
+
342
+ # Get perplexity score
343
+ perp_score = self.calculate_perplexity(text)
344
+ if perp_score is not None:
345
+ all_scores.append(perp_score)
346
+ all_weights.append(model_weights['perplexity'])
347
+ model_results['perplexity'] = perp_score
348
+
349
+ # 2. Statistical analysis
350
+ stat_score, stat_details = self.advanced_linguistic_analysis(text)
351
+ all_scores.append(stat_score)
352
+ all_weights.append(0.20)
353
+ model_results['statistical'] = stat_score
354
+
355
+ # 3. Calculate weighted final score
356
+ if all_scores:
357
  # Normalize weights
358
+ total_weight = sum(all_weights)
359
+ normalized_weights = [w/total_weight for w in all_weights]
360
+
361
+ # Weighted average
362
+ final_score = sum(s * w for s, w in zip(all_scores, normalized_weights))
363
+
364
+ # Boost score if multiple models agree strongly
365
+ agreement_scores = [s for s in all_scores if s > 0.7 or s < 0.3]
366
+ if len(agreement_scores) >= 2:
367
+ avg_agreement = np.mean(agreement_scores)
368
+ if avg_agreement > 0.7:
369
+ final_score = min(final_score * 1.1, 0.95)
370
+ elif avg_agreement < 0.3:
371
+ final_score = max(final_score * 0.9, 0.05)
372
  else:
373
  final_score = 0.5
374
 
375
+ # 4. Classification with better thresholds for ChatGPT
376
+ if final_score >= 0.70:
377
+ classification = "AI-Generated (High Confidence)"
378
+ confidence = "HIGH"
379
  elif final_score >= 0.55:
380
+ classification = "Likely AI-Generated"
381
+ confidence = "MEDIUM-HIGH"
382
  elif final_score >= 0.45:
383
+ classification = "Uncertain"
384
+ confidence = "LOW"
385
+ elif final_score >= 0.30:
386
+ classification = "Likely Human-Written"
387
+ confidence = "MEDIUM"
388
  else:
389
+ classification = "Human-Written (High Confidence)"
390
+ confidence = "HIGH"
391
 
392
+ # 5. Generate explanation
393
+ explanation = self._create_explanation(final_score, model_results, stat_details)
 
 
 
 
394
 
395
  return {
396
  "ai_probability": round(final_score * 100, 2),
397
  "classification": classification,
398
  "confidence": confidence,
399
  "explanation": explanation,
400
+ "model_scores": model_results,
401
+ "statistical_analysis": stat_details
 
 
 
 
 
402
  }
403
 
404
+ def _create_explanation(self, score, model_results, stat_details):
405
+ """Create detailed explanation"""
406
+ exp = []
407
 
408
  # Overall assessment
409
+ if score >= 0.70:
410
+ exp.append("πŸ€– STRONG AI INDICATORS: The text exhibits multiple characteristics typical of AI-generated content.")
411
  elif score >= 0.55:
412
+ exp.append("⚠️ PROBABLE AI: Several AI patterns detected, suggesting machine generation.")
413
  elif score >= 0.45:
414
+ exp.append("❓ INCONCLUSIVE: Mixed signals - could be AI-assisted or edited content.")
415
+ elif score >= 0.30:
416
+ exp.append("✍️ PROBABLE HUMAN: More human-like characteristics than AI patterns.")
417
  else:
418
+ exp.append("πŸ‘€ STRONG HUMAN INDICATORS: Text shows natural human writing patterns.")
419
 
420
+ # Model consensus
421
+ if model_results:
422
+ high_ai = [name for name, s in model_results.items() if s > 0.65]
423
+ high_human = [name for name, s in model_results.items() if s < 0.35]
424
+
425
+ if len(high_ai) >= 2:
426
+ exp.append(f"\n\nβœ“ Multiple models detect AI: {', '.join(high_ai)}")
427
+ elif len(high_human) >= 2:
428
+ exp.append(f"\n\nβœ“ Multiple models detect human writing: {', '.join(high_human)}")
429
 
430
+ # Specific indicators
431
  if stat_details.get('chatgpt_patterns', 0) > 0.5:
432
+ exp.append("\n\n⚑ High density of ChatGPT-style phrases and structures detected")
 
 
 
 
 
 
 
 
433
 
434
+ if stat_details.get('sent_length_std', 1) < 0.3:
435
+ exp.append("\nπŸ“ Unusually consistent sentence lengths (AI characteristic)")
436
+
437
+ if stat_details.get('trigram_repetition', 0) > 0.1:
438
+ exp.append("\nπŸ” Repeated phrase patterns detected")
439
+
440
+ return " ".join(exp)
441
+
442
 
443
  # Initialize detector
444
+ print("Initializing AI Text Detector...")
445
  detector = AdvancedAITextDetector()
446
 
447
  def analyze_text(text):
448
  """Gradio interface function"""
449
+ if not text:
450
+ return "Please enter some text to analyze."
451
+
452
  result = detector.detect(text)
453
 
454
+ # Format output
455
+ output = f"""# πŸ” AI Detection Results
456
+
457
+ ## **{result['classification']}**
458
 
459
+ ### πŸ“Š AI Probability: **{result['ai_probability']}%**
460
+ ### 🎯 Confidence: **{result['confidence']}**
 
461
 
462
+ ---
463
+
464
+ ## πŸ“ Analysis Summary
465
  {result['explanation']}
466
 
467
+ ---
468
+
469
+ ## πŸ“ˆ Model Scores
470
  """
471
 
472
  if result.get('model_scores'):
473
  for model, score in result['model_scores'].items():
474
  if score is not None:
475
+ percentage = round(score * 100, 1)
476
+ bar_length = int(percentage / 5)
477
+ bar = 'β–ˆ' * bar_length + 'β–‘' * (20 - bar_length)
478
+
479
+ model_display = {
480
+ 'openai': 'πŸ”· OpenAI Detector',
481
+ 'roberta_detector': 'πŸ€– RoBERTa ChatGPT',
482
+ 'multilingual': '🌍 Multilingual',
483
+ 'pirate': 'πŸ΄β€β˜ οΈ PirateXX',
484
+ 'perplexity': 'πŸ“Š Perplexity',
485
+ 'statistical': 'πŸ“ˆ Statistical'
486
+ }.get(model, model)
487
+
488
+ output += f"\n**{model_display}:** {bar} {percentage}%"
489
 
490
+ # Statistical details
491
+ if result.get('statistical_analysis'):
492
+ output += "\n\n---\n\n## πŸ”¬ Detailed Linguistic Analysis\n"
493
+
494
+ analysis = result['statistical_analysis']
495
+
496
+ if 'chatgpt_patterns' in analysis:
497
+ output += f"\n- **ChatGPT Pattern Score:** {analysis['chatgpt_patterns']:.2f}/1.00"
498
+ if 'sent_length_std' in analysis:
499
+ output += f"\n- **Sentence Variance:** {analysis['sent_length_std']:.3f} (lower = more AI-like)"
500
+ if 'trigram_repetition' in analysis:
501
+ output += f"\n- **Phrase Repetition:** {analysis['trigram_repetition']:.3f}"
502
+ if 'starter_diversity' in analysis:
503
+ output += f"\n- **Sentence Starter Diversity:** {analysis['starter_diversity']:.3f}"
504
 
505
+ # Visual representation
506
  ai_prob = result['ai_probability']
507
  human_prob = 100 - ai_prob
508
 
509
+ output += f"""
510
+
511
+ ---
512
+
513
+ ## 🎯 Final Verdict
514
+
515
  ```
516
+ AI Generated: {'β–ˆ' * int(ai_prob/5)}{'β–‘' * (20-int(ai_prob/5))} {ai_prob:.1f}%
517
+ Human Written: {'β–ˆ' * int(human_prob/5)}{'β–‘' * (20-int(human_prob/5))} {human_prob:.1f}%
518
  ```
519
  """
520
 
521
+ # Add disclaimer for low confidence
522
+ if result['confidence'] == "LOW":
523
+ output += "\n\n⚠️ **Note:** Low confidence result. Consider getting human verification."
524
 
525
+ return output
526
 
527
  # Create Gradio interface
528
  interface = gr.Interface(
529
  fn=analyze_text,
530
  inputs=gr.Textbox(
531
+ lines=12,
532
+ placeholder="Paste text here to check if it's AI-generated...\n\nFor best results, provide at least 100 words.",
533
+ label="Text to Analyze"
534
  ),
535
+ outputs=gr.Markdown(label="Detection Results"),
536
+ title="πŸš€ Advanced ChatGPT & AI Text Detector",
537
  description="""
538
+ ## State-of-the-art AI text detection using multiple methods:
539
 
540
+ ### πŸ”₯ Detection Methods:
541
+ - **Multiple AI Detection Models** - Ensemble of specialized detectors
542
+ - **Perplexity Analysis** - Measures text predictability (AI text is more predictable)
543
+ - **Pattern Recognition** - Detects ChatGPT-specific writing patterns
544
+ - **Linguistic Analysis** - Analyzes sentence structure, vocabulary, and style
 
545
 
546
+ ### πŸ’‘ Best Practices:
547
+ - Provide at least **100-200 words** for accurate detection
548
+ - Longer texts generally give more reliable results
549
  - Works best with English text
550
+ - Detection is probabilistic - use as guidance, not absolute proof
551
 
552
+ ### 🎯 What This Detects:
553
+ - ChatGPT (GPT-3.5/GPT-4)
554
+ - Claude, Gemini, and other LLMs
555
+ - AI-assisted or heavily edited content
556
+ - Paraphrased AI content
557
+
558
+ **Note:** No detector is 100% accurate. This tool provides sophisticated analysis but should be used alongside human judgment.
559
  """,
560
  examples=[
561
+ # ChatGPT example
562
+ ["Artificial intelligence has revolutionized numerous industries in recent years. It's important to note that this technology offers both opportunities and challenges. Machine learning algorithms can process vast amounts of data, identify patterns, and make predictions with remarkable accuracy. Furthermore, AI applications span various domains including healthcare, finance, and transportation. However, it's crucial to consider the ethical implications. Issues such as bias in algorithms, job displacement, and privacy concerns require careful consideration. Additionally, the development of AI must be guided by responsible practices. In conclusion, while AI presents tremendous potential for innovation and progress, we must approach its implementation thoughtfully and ethically."],
563
+
564
+ # Human example
565
+ ["So yesterday I'm at the coffee shop, right? And this guy next to me is having the LOUDEST phone conversation about his crypto investments. Like, dude, we get it, you bought Dogecoin. But here's the thing - he kept saying he was gonna be a millionaire by next week. Next week! I almost choked on my latte. The barista and I made eye contact and we both just tried not to laugh. I mean, good luck to him and all, but maybe don't count those chickens yet? Anyway, that's my coffee shop drama for the week. Still better than working from home where my cat judges me all day."],
566
+
567
+ # Mixed/edited example
568
+ ["The impact of social media on society has been profound. Studies show that people spend an average of 2.5 hours daily on social platforms. But honestly, I think it's probably way more than that - I know I'm constantly checking my phone! These platforms have transformed how we communicate, share information, and even how we see ourselves. There are definitely benefits, like staying connected with friends and family across distances. However, we're also seeing rises in anxiety and depression linked to social media use, especially among teenagers. It's a complex issue that deserves our attention."]
569
  ],
570
+ theme=gr.themes.Soft(
571
+ primary_hue="blue",
572
+ secondary_hue="indigo",
573
+ neutral_hue="slate"
574
+ ),
575
+ analytics_enabled=False,
576
+ cache_examples=False
577
  )
578
 
579
  if __name__ == "__main__":