Jay-Rajput commited on
Commit
f304cbc
Β·
1 Parent(s): 05aff20

ai detector enhanced

Browse files
Files changed (2) hide show
  1. app.py +471 -167
  2. requirements.txt +7 -6
app.py CHANGED
@@ -1,8 +1,7 @@
1
 
2
  """
3
- Advanced AI Text Detector - 4-Category Classification
4
- Enhanced accuracy with nuanced detection categories for Hugging Face Spaces
5
- Renamed to app.py for Hugging Face Spaces deployment
6
  """
7
 
8
  import gradio as gr
@@ -15,6 +14,11 @@ from typing import Dict, List, Tuple
15
  import statistics
16
  import string
17
  from collections import Counter
 
 
 
 
 
18
 
19
  class ImprovedAIDetector:
20
  """
@@ -39,9 +43,7 @@ class ImprovedAIDetector:
39
  self.model = None
40
 
41
  def extract_linguistic_features(self, text: str) -> Dict[str, float]:
42
- """
43
- Extract comprehensive linguistic features for detection
44
- """
45
  if len(text.strip()) < 10:
46
  return {}
47
 
@@ -90,7 +92,7 @@ class ImprovedAIDetector:
90
  ai_indicator_count = sum(1 for word in words if word.lower() in ai_indicators)
91
  features['ai_indicator_ratio'] = ai_indicator_count / len(words) if words else 0
92
 
93
- # Repetition patterns (AI tends to be more repetitive)
94
  bigrams = [(words[i].lower(), words[i+1].lower()) for i in range(len(words)-1)]
95
  unique_bigrams = len(set(bigrams))
96
  features['bigram_diversity'] = unique_bigrams / len(bigrams) if bigrams else 0
@@ -98,16 +100,12 @@ class ImprovedAIDetector:
98
  return features
99
 
100
  def calculate_perplexity_score(self, text: str) -> float:
101
- """
102
- Calculate a simplified perplexity-like score
103
- """
104
  if not self.model or not self.tokenizer:
105
- # Fallback heuristic
106
  words = text.split()
107
  if len(words) < 5:
108
  return 0.5
109
 
110
- # Simple heuristic: longer, more complex sentences = higher perplexity
111
  avg_word_length = np.mean([len(word) for word in words])
112
  sentence_count = len(re.split(r'[.!?]+', text))
113
  complexity_score = (avg_word_length * sentence_count) / len(words)
@@ -117,21 +115,16 @@ class ImprovedAIDetector:
117
  inputs = self.tokenizer(text, return_tensors="pt", truncation=True, max_length=512)
118
  with torch.no_grad():
119
  outputs = self.model(**inputs)
120
- # Use model confidence as perplexity proxy
121
  probs = torch.softmax(outputs.logits, dim=-1)
122
  confidence = torch.max(probs).item()
123
- # Invert confidence to get perplexity-like score
124
  return 1.0 - confidence
125
  except:
126
  return 0.5
127
 
128
  def detect_refinement_patterns(self, text: str, linguistic_features: Dict) -> Dict[str, float]:
129
- """
130
- Detect patterns indicating AI refinement/editing
131
- """
132
  refinement_indicators = {}
133
 
134
- # Perfect grammar/structure indicators (suggests AI refinement)
135
  sentences = re.split(r'[.!?]+', text)
136
  sentences = [s.strip() for s in sentences if s.strip()]
137
 
@@ -148,21 +141,19 @@ class ImprovedAIDetector:
148
  formal_count = sum(1 for word in text.lower().split() if word in formal_words)
149
  refinement_indicators['formality_score'] = min(formal_count / len(text.split()) * 10, 1.0)
150
 
151
- # Check for lack of contractions (AI refinement often removes contractions)
152
  contractions = ["n't", "'ll", "'re", "'ve", "'m", "'d", "'s"]
153
  contraction_count = sum(1 for word in text.split() if any(cont in word for cont in contractions))
154
  words_count = len(text.split())
155
  refinement_indicators['contraction_absence'] = 1.0 - min(contraction_count / words_count * 5, 1.0) if words_count > 0 else 0.5
156
 
157
- # Check for overly perfect punctuation
158
  punct_perfect_score = 0.5
159
  if ',' in text and '.' in text:
160
- # Simple heuristic for punctuation correctness
161
  comma_count = text.count(',')
162
  period_count = text.count('.')
163
  if comma_count > 0 and period_count > 0:
164
  punct_ratio = comma_count / (comma_count + period_count)
165
- # Refined text often has more balanced punctuation
166
  if 0.3 <= punct_ratio <= 0.7:
167
  punct_perfect_score = 0.8
168
 
@@ -171,9 +162,7 @@ class ImprovedAIDetector:
171
  return refinement_indicators
172
 
173
  def classify_text_category(self, text: str) -> Tuple[str, Dict[str, float], float]:
174
- """
175
- Classify text into 4 categories with confidence scores
176
- """
177
  if len(text.strip()) < 10:
178
  return "Uncertain", {"ai_generated": 0.25, "ai_refined": 0.25, "human_ai_refined": 0.25, "human_written": 0.25}, 0.3
179
 
@@ -190,17 +179,16 @@ class ImprovedAIDetector:
190
  with torch.no_grad():
191
  outputs = self.model(**inputs)
192
  probs = torch.softmax(outputs.logits, dim=-1)
193
- transformer_ai_prob = probs[0][1].item() # AI probability
194
  except:
195
  pass
196
 
197
- # Calculate category probabilities using ensemble approach
198
  scores = {}
199
 
200
  # AI-generated score
201
  ai_generated_score = 0.0
202
  if linguistic_features:
203
- # AI tends to have: consistent sentence length, formal language, lower lexical diversity
204
  ai_generated_score = (
205
  transformer_ai_prob * 0.4 +
206
  (1.0 - linguistic_features.get('lexical_diversity', 0.5)) * 0.2 +
@@ -257,7 +245,7 @@ class ImprovedAIDetector:
257
 
258
  scores['human_written'] = min(max(human_written_score, 0.0), 1.0)
259
 
260
- # Normalize scores to sum to 1
261
  total_score = sum(scores.values())
262
  if total_score > 0:
263
  scores = {k: v / total_score for k, v in scores.items()}
@@ -278,73 +266,233 @@ class ImprovedAIDetector:
278
 
279
  return category_names[primary_category], scores, confidence
280
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
281
  # Initialize detector
282
  detector = ImprovedAIDetector()
283
 
284
- def analyze_text(text):
285
- """
286
- Main analysis function for Gradio interface
287
- """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
288
  if not text or len(text.strip()) < 10:
289
  return (
290
  "⚠️ Please provide at least 10 characters of text for accurate analysis.",
291
- 0.0, 0.0, 0.0, 0.0, # Four category scores
292
- 0.0, 0.0, # AI and Human probabilities
293
- 0.0, # Confidence
294
- "N/A" # Processing time
295
  )
296
 
297
  start_time = time.time()
298
 
299
  try:
300
- # Get detailed classification
301
  primary_category, category_scores, confidence = detector.classify_text_category(text)
302
 
303
- # Calculate traditional AI/Human probabilities
304
- ai_probability = category_scores['ai_generated'] + category_scores['ai_refined']
305
- human_probability = category_scores['human_ai_refined'] + category_scores['human_written']
306
 
307
  processing_time = (time.time() - start_time) * 1000
308
 
309
- # Format result message
310
- result_message = f"""
311
- ## 🎯 **{primary_category}**
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
312
 
313
- **Confidence:** {confidence:.1%}
 
 
 
 
 
 
 
 
 
314
 
315
- ### Category Breakdown:
316
- - **AI-generated:** {category_scores['ai_generated']:.1%}
317
- - **AI-generated & AI-refined:** {category_scores['ai_refined']:.1%}
318
- - **Human-written & AI-refined:** {category_scores['human_ai_refined']:.1%}
319
- - **Human-written:** {category_scores['human_written']:.1%}
320
 
321
- *Analysis completed in {processing_time:.0f}ms*
 
 
 
 
 
322
  """
323
 
324
  return (
325
- result_message,
326
- category_scores['ai_generated'],
327
- category_scores['ai_refined'],
328
- category_scores['human_ai_refined'],
329
- category_scores['human_written'],
330
- ai_probability,
331
- human_probability,
332
- confidence,
333
- f"{processing_time:.0f}ms"
334
  )
335
 
336
  except Exception as e:
337
  return (
338
  f"❌ Error during analysis: {str(e)}",
339
- 0.0, 0.0, 0.0, 0.0,
340
- 0.5, 0.5, 0.0,
341
  "Error"
342
  )
343
 
344
- def batch_analyze(file):
345
- """
346
- Analyze multiple texts from uploaded file
347
- """
348
  if file is None:
349
  return "Please upload a text file."
350
 
@@ -357,27 +505,36 @@ def batch_analyze(file):
357
 
358
  results = []
359
  category_counts = {'AI-generated': 0, 'AI-generated & AI-refined': 0, 'Human-written & AI-refined': 0, 'Human-written': 0}
 
360
 
361
- for i, text in enumerate(texts[:15]): # Limit to 15 texts for performance
362
  primary_category, category_scores, confidence = detector.classify_text_category(text)
363
  category_counts[primary_category] += 1
364
 
 
 
 
365
  results.append(f"""
366
- **Text {i+1}:** {text[:80]}{'...' if len(text) > 80 else ''}
367
- **Result:** {primary_category} ({confidence:.1%} confidence)
368
- **Breakdown:** AI-gen: {category_scores['ai_generated']:.0%}, AI-refined: {category_scores['ai_refined']:.0%}, Human+AI: {category_scores['human_ai_refined']:.0%}, Human: {category_scores['human_written']:.0%}
369
  """)
370
 
 
 
371
  summary = f"""
372
  ## πŸ“Š Batch Analysis Summary
373
 
374
- **Total texts analyzed:** {len(results)}
 
375
 
376
  ### Category Distribution:
377
- - **AI-generated:** {category_counts['AI-generated']} texts
378
- - **AI-generated & AI-refined:** {category_counts['AI-generated & AI-refined']} texts
379
- - **Human-written & AI-refined:** {category_counts['Human-written & AI-refined']} texts
380
- - **Human-written:** {category_counts['Human-written']} texts
 
 
381
 
382
  ### Individual Results:
383
  """
@@ -387,14 +544,18 @@ def batch_analyze(file):
387
  except Exception as e:
388
  return f"Error processing file: {str(e)}"
389
 
390
- # Create improved Gradio interface
 
 
 
 
391
  def create_improved_interface():
392
- """Create enhanced Gradio interface with 4-category classification"""
393
 
394
  custom_css = """
395
  .gradio-container {
396
  font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
397
- max-width: 1200px;
398
  margin: 0 auto;
399
  }
400
  .gr-button-primary {
@@ -402,29 +563,32 @@ def create_improved_interface():
402
  border: none;
403
  border-radius: 8px;
404
  font-weight: 600;
 
405
  }
406
  .gr-button-primary:hover {
407
  transform: translateY(-2px);
408
  box-shadow: 0 8px 25px rgba(102, 126, 234, 0.3);
409
  }
410
- .category-score {
411
- padding: 8px;
412
- margin: 4px;
413
- border-radius: 6px;
414
- border-left: 4px solid #667eea;
 
415
  }
416
  """
417
 
418
  with gr.Blocks(css=custom_css, title="Advanced AI Text Detector", theme=gr.themes.Soft()) as interface:
419
 
420
  gr.HTML("""
421
- <div style="text-align: center; padding: 20px; background: linear-gradient(45deg, #f0f2f6, #e8eaf6); border-radius: 12px; margin-bottom: 20px;">
422
- <h1 style="color: #2c3e50; margin-bottom: 10px;">πŸ” Advanced AI Text Detector</h1>
423
- <p style="font-size: 18px; color: #555; margin: 0;">
424
- Sophisticated 4-category classification for precise AI detection
 
425
  </p>
426
- <p style="font-size: 14px; color: #666; margin-top: 8px;">
427
- Detects pure AI content, AI-refined text, and human writing with enhanced accuracy
428
  </p>
429
  </div>
430
  """)
@@ -434,75 +598,93 @@ def create_improved_interface():
434
  # Single text analysis tab
435
  with gr.Tab("πŸ” Text Analysis", elem_id="single-analysis"):
436
  with gr.Row():
437
- with gr.Column(scale=3):
438
  text_input = gr.Textbox(
439
  label="πŸ“ Enter text to analyze",
440
  placeholder="Paste your text here (minimum 10 characters for accurate analysis)...",
441
- lines=8,
442
- max_lines=15,
443
  show_label=True
444
  )
445
 
446
  analyze_btn = gr.Button(
447
  "πŸš€ Analyze Text",
448
  variant="primary",
449
- size="lg",
450
- scale=1
451
  )
452
 
453
- with gr.Column(scale=2):
454
- result_output = gr.Markdown(
455
- label="πŸ“Š Analysis Results",
456
- value="Results will appear here after analysis..."
457
  )
458
 
459
- # Detailed metrics section
460
- gr.HTML("<hr style='margin: 20px 0;'><h3>πŸ“ˆ Detailed Metrics</h3>")
461
-
462
- with gr.Row():
463
- with gr.Column():
464
- ai_generated_score = gr.Number(
465
- label="πŸ€– AI-generated",
466
- precision=3,
467
- info="Text likely generated by AI, like ChatGPT or Gemini."
468
- )
469
- ai_refined_score = gr.Number(
470
- label="πŸ› οΈ AI-generated & AI-refined",
471
- precision=3,
472
- info="Text likely generated by AI, then refined or altered using AI tools."
473
  )
474
 
475
- with gr.Column():
476
- human_ai_refined_score = gr.Number(
477
- label="✍️ Human-written & AI-refined",
478
- precision=3,
479
- info="Text likely written by humans, then refined or altered using AI tools."
480
- )
481
- human_written_score = gr.Number(
482
- label="πŸ‘€ Human-written",
483
- precision=3,
484
- info="Text likely written by humans without the help of AI or paraphrasing tools."
485
  )
486
 
487
- with gr.Row():
488
- with gr.Column():
489
- ai_probability = gr.Number(label="🎯 Overall AI Probability", precision=3)
490
- human_probability = gr.Number(label="πŸ‘₯ Overall Human Probability", precision=3)
 
491
 
492
- with gr.Column():
493
- confidence_score = gr.Number(label="πŸ“Š Confidence Score", precision=3)
494
- processing_time = gr.Textbox(label="⚑ Processing Time", interactive=False)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
495
 
496
  # Batch analysis tab
497
  with gr.Tab("πŸ“„ Batch Analysis", elem_id="batch-analysis"):
498
  gr.HTML("""
499
- <div style="background: #f8f9fa; padding: 15px; border-radius: 8px; margin-bottom: 15px;">
500
- <h4>πŸ“‹ Instructions for Batch Analysis</h4>
501
- <ul>
502
- <li>Upload a <strong>.txt</strong> file with one text per line</li>
503
- <li>Each line should contain at least 10 characters</li>
504
- <li>Maximum 15 texts will be processed for performance</li>
505
- <li>Results include category distribution and individual analysis</li>
 
506
  </ul>
507
  </div>
508
  """)
@@ -516,6 +698,101 @@ def create_improved_interface():
516
  batch_analyze_btn = gr.Button("πŸ” Analyze Batch", variant="primary", size="lg")
517
  batch_results = gr.Markdown(label="πŸ“Š Batch Results")
518
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
519
  # About tab
520
  with gr.Tab("ℹ️ About", elem_id="about-tab"):
521
  gr.Markdown("""
@@ -523,7 +800,7 @@ def create_improved_interface():
523
 
524
  ## 🎯 Enhanced 4-Category Classification
525
 
526
- This advanced detector provides nuanced analysis beyond simple AI vs Human classification:
527
 
528
  ### πŸ“‹ Detection Categories
529
 
@@ -532,19 +809,21 @@ def create_improved_interface():
532
  3. **✍️ Human-written & AI-refined**: Human content enhanced or edited using AI tools
533
  4. **πŸ‘€ Human-written**: Pure human content without AI assistance
534
 
535
- ### πŸš€ Technical Improvements
536
 
 
537
  - **Multi-layered Analysis**: Combines transformer models with linguistic feature analysis
538
  - **Refinement Detection**: Identifies patterns indicating AI editing/enhancement
539
- - **Enhanced Accuracy**: Ensemble approach reduces false positives and false negatives
540
  - **Confidence Scoring**: Provides reliability measures for each prediction
 
541
 
542
- ### πŸ“Š Key Features
543
 
544
  - **Linguistic Feature Analysis**: Examines vocabulary diversity, sentence structure, punctuation patterns
545
  - **Refinement Pattern Detection**: Identifies signs of AI editing or enhancement
546
  - **Transformer Integration**: Uses fine-tuned RoBERTa models for baseline detection
547
  - **Ensemble Classification**: Combines multiple approaches for robust predictions
 
548
 
549
  ### 🎨 Use Cases
550
 
@@ -552,6 +831,7 @@ def create_improved_interface():
552
  - **Academic Integrity**: Detect AI assistance in student submissions
553
  - **Content Moderation**: Identify AI-generated content in social media
554
  - **Quality Assessment**: Understand the level of AI involvement in text creation
 
555
 
556
  ### ⚑ Performance Characteristics
557
 
@@ -559,41 +839,47 @@ def create_improved_interface():
559
  - **Processing Speed**: < 2 seconds for most texts
560
  - **Optimal Text Length**: 50+ words for best accuracy
561
  - **Language Support**: Optimized for English text
 
562
 
563
  ### πŸ”¬ Methodology
564
 
565
- The detector uses an ensemble approach combining:
566
- 1. Pre-trained transformer model predictions
567
- 2. Linguistic feature extraction and analysis
568
- 3. AI refinement pattern detection
569
- 4. Statistical text analysis
570
  5. Weighted scoring and normalization
571
 
572
- ### ⚠��� Limitations
573
 
574
  - Performance may vary with very short texts (< 50 words)
575
- - Heavily paraphrased content may be challenging to classify
576
- - Newer AI models may require periodic model updates
577
  - Non-English text may have reduced accuracy
 
578
 
579
  ### πŸ”„ Continuous Improvement
580
 
581
- This detector is regularly updated to adapt to new AI text generation techniques and improve accuracy across different content types.
 
 
 
 
 
 
 
 
582
  """)
583
 
584
  # Event handlers
585
  analyze_btn.click(
586
- fn=analyze_text,
587
  inputs=[text_input],
588
- outputs=[
589
- result_output,
590
- ai_generated_score, ai_refined_score, human_ai_refined_score, human_written_score,
591
- ai_probability, human_probability, confidence_score, processing_time
592
- ]
593
  )
594
 
595
  batch_analyze_btn.click(
596
- fn=batch_analyze,
597
  inputs=[file_input],
598
  outputs=[batch_results]
599
  )
@@ -601,24 +887,42 @@ def create_improved_interface():
601
  # Example texts
602
  gr.Examples(
603
  examples=[
604
- ["Artificial intelligence has revolutionized numerous industries through advanced machine learning algorithms that enable automated decision-making processes and enhanced operational efficiency across various sectors."],
605
- ["I can't believe how incredible this weekend trip was! We drove up to the mountains and the whole experience was just magical. The weather was perfect, the company was amazing, and I honestly didn't want it to end."],
606
- ["The implementation of sustainable energy solutions requires comprehensive analysis of environmental factors, economic considerations, and technological feasibility to ensure optimal outcomes for stakeholders."],
607
- ["Hey Sarah! Thanks for your email about the project timeline. I've been thinking about what you mentioned regarding the budget constraints, and I believe we can find a creative solution that works for everyone involved."]
608
  ],
609
  inputs=text_input,
610
- outputs=[
611
- result_output,
612
- ai_generated_score, ai_refined_score, human_ai_refined_score, human_written_score,
613
- ai_probability, human_probability, confidence_score, processing_time
614
- ],
615
- fn=analyze_text,
616
  cache_examples=False
617
  )
618
 
619
  return interface
620
 
621
- # Launch the improved interface
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
622
  if __name__ == "__main__":
623
  interface = create_improved_interface()
624
  interface.launch(
 
1
 
2
  """
3
+ Advanced AI Text Detector - Enhanced Results Display & API
4
+ 4-Category Classification with improved UX and JSON API support
 
5
  """
6
 
7
  import gradio as gr
 
14
  import statistics
15
  import string
16
  from collections import Counter
17
+ import json
18
+ import plotly.graph_objects as go
19
+ import plotly.express as px
20
+ from fastapi import FastAPI
21
+ from fastapi.middleware.cors import CORSMiddleware
22
 
23
  class ImprovedAIDetector:
24
  """
 
43
  self.model = None
44
 
45
  def extract_linguistic_features(self, text: str) -> Dict[str, float]:
46
+ """Extract comprehensive linguistic features for detection"""
 
 
47
  if len(text.strip()) < 10:
48
  return {}
49
 
 
92
  ai_indicator_count = sum(1 for word in words if word.lower() in ai_indicators)
93
  features['ai_indicator_ratio'] = ai_indicator_count / len(words) if words else 0
94
 
95
+ # Repetition patterns
96
  bigrams = [(words[i].lower(), words[i+1].lower()) for i in range(len(words)-1)]
97
  unique_bigrams = len(set(bigrams))
98
  features['bigram_diversity'] = unique_bigrams / len(bigrams) if bigrams else 0
 
100
  return features
101
 
102
  def calculate_perplexity_score(self, text: str) -> float:
103
+ """Calculate a simplified perplexity-like score"""
 
 
104
  if not self.model or not self.tokenizer:
 
105
  words = text.split()
106
  if len(words) < 5:
107
  return 0.5
108
 
 
109
  avg_word_length = np.mean([len(word) for word in words])
110
  sentence_count = len(re.split(r'[.!?]+', text))
111
  complexity_score = (avg_word_length * sentence_count) / len(words)
 
115
  inputs = self.tokenizer(text, return_tensors="pt", truncation=True, max_length=512)
116
  with torch.no_grad():
117
  outputs = self.model(**inputs)
 
118
  probs = torch.softmax(outputs.logits, dim=-1)
119
  confidence = torch.max(probs).item()
 
120
  return 1.0 - confidence
121
  except:
122
  return 0.5
123
 
124
  def detect_refinement_patterns(self, text: str, linguistic_features: Dict) -> Dict[str, float]:
125
+ """Detect patterns indicating AI refinement/editing"""
 
 
126
  refinement_indicators = {}
127
 
 
128
  sentences = re.split(r'[.!?]+', text)
129
  sentences = [s.strip() for s in sentences if s.strip()]
130
 
 
141
  formal_count = sum(1 for word in text.lower().split() if word in formal_words)
142
  refinement_indicators['formality_score'] = min(formal_count / len(text.split()) * 10, 1.0)
143
 
144
+ # Check for lack of contractions
145
  contractions = ["n't", "'ll", "'re", "'ve", "'m", "'d", "'s"]
146
  contraction_count = sum(1 for word in text.split() if any(cont in word for cont in contractions))
147
  words_count = len(text.split())
148
  refinement_indicators['contraction_absence'] = 1.0 - min(contraction_count / words_count * 5, 1.0) if words_count > 0 else 0.5
149
 
150
+ # Check for punctuation patterns
151
  punct_perfect_score = 0.5
152
  if ',' in text and '.' in text:
 
153
  comma_count = text.count(',')
154
  period_count = text.count('.')
155
  if comma_count > 0 and period_count > 0:
156
  punct_ratio = comma_count / (comma_count + period_count)
 
157
  if 0.3 <= punct_ratio <= 0.7:
158
  punct_perfect_score = 0.8
159
 
 
162
  return refinement_indicators
163
 
164
  def classify_text_category(self, text: str) -> Tuple[str, Dict[str, float], float]:
165
+ """Classify text into 4 categories with confidence scores"""
 
 
166
  if len(text.strip()) < 10:
167
  return "Uncertain", {"ai_generated": 0.25, "ai_refined": 0.25, "human_ai_refined": 0.25, "human_written": 0.25}, 0.3
168
 
 
179
  with torch.no_grad():
180
  outputs = self.model(**inputs)
181
  probs = torch.softmax(outputs.logits, dim=-1)
182
+ transformer_ai_prob = probs[0][1].item()
183
  except:
184
  pass
185
 
186
+ # Calculate category probabilities
187
  scores = {}
188
 
189
  # AI-generated score
190
  ai_generated_score = 0.0
191
  if linguistic_features:
 
192
  ai_generated_score = (
193
  transformer_ai_prob * 0.4 +
194
  (1.0 - linguistic_features.get('lexical_diversity', 0.5)) * 0.2 +
 
245
 
246
  scores['human_written'] = min(max(human_written_score, 0.0), 1.0)
247
 
248
+ # Normalize scores
249
  total_score = sum(scores.values())
250
  if total_score > 0:
251
  scores = {k: v / total_score for k, v in scores.items()}
 
266
 
267
  return category_names[primary_category], scores, confidence
268
 
269
+ def get_analysis_json(self, text: str) -> Dict:
270
+ """Get analysis results in JSON format for API"""
271
+ start_time = time.time()
272
+
273
+ if not text or len(text.strip()) < 10:
274
+ return {
275
+ "error": "Text must be at least 10 characters long",
276
+ "ai_percentage": 0,
277
+ "human_percentage": 0,
278
+ "category_scores": {
279
+ "ai_generated": 0,
280
+ "ai_refined": 0,
281
+ "human_ai_refined": 0,
282
+ "human_written": 0
283
+ },
284
+ "primary_category": "uncertain",
285
+ "confidence": 0,
286
+ "processing_time_ms": 0
287
+ }
288
+
289
+ try:
290
+ primary_category, category_scores, confidence = self.classify_text_category(text)
291
+
292
+ ai_percentage = (category_scores['ai_generated'] + category_scores['ai_refined']) * 100
293
+ human_percentage = (category_scores['human_ai_refined'] + category_scores['human_written']) * 100
294
+
295
+ processing_time = (time.time() - start_time) * 1000
296
+
297
+ return {
298
+ "ai_percentage": round(ai_percentage, 1),
299
+ "human_percentage": round(human_percentage, 1),
300
+ "category_scores": {
301
+ "ai_generated": round(category_scores['ai_generated'] * 100, 1),
302
+ "ai_refined": round(category_scores['ai_refined'] * 100, 1),
303
+ "human_ai_refined": round(category_scores['human_ai_refined'] * 100, 1),
304
+ "human_written": round(category_scores['human_written'] * 100, 1)
305
+ },
306
+ "primary_category": primary_category.lower().replace(' ', '_').replace('-', '_'),
307
+ "confidence": round(confidence * 100, 1),
308
+ "processing_time_ms": round(processing_time, 1)
309
+ }
310
+
311
+ except Exception as e:
312
+ return {
313
+ "error": str(e),
314
+ "ai_percentage": 0,
315
+ "human_percentage": 0,
316
+ "category_scores": {
317
+ "ai_generated": 0,
318
+ "ai_refined": 0,
319
+ "human_ai_refined": 0,
320
+ "human_written": 0
321
+ },
322
+ "primary_category": "error",
323
+ "confidence": 0,
324
+ "processing_time_ms": 0
325
+ }
326
+
327
  # Initialize detector
328
  detector = ImprovedAIDetector()
329
 
330
+ def create_bar_chart(ai_percentage, human_percentage):
331
+ """Create vertical bar chart showing AI vs Human percentages"""
332
+
333
+ fig = go.Figure(data=[
334
+ go.Bar(
335
+ x=['AI', 'Human'],
336
+ y=[ai_percentage, human_percentage],
337
+ marker=dict(
338
+ color=['#FF6B6B', '#4ECDC4'],
339
+ line=dict(color='rgba(0,0,0,0.3)', width=2)
340
+ ),
341
+ text=[f'{ai_percentage:.0f}%', f'{human_percentage:.0f}%'],
342
+ textposition='auto',
343
+ textfont=dict(size=14, color='white', family='Arial Black'),
344
+ hovertemplate='<b>%{x}</b><br>%{y:.1f}%<extra></extra>'
345
+ )
346
+ ])
347
+
348
+ fig.update_layout(
349
+ title=dict(
350
+ text='AI vs Human Content Distribution',
351
+ x=0.5,
352
+ font=dict(size=16, color='#2c3e50', family='Arial')
353
+ ),
354
+ xaxis=dict(
355
+ title='Content Type',
356
+ titlefont=dict(size=14, color='#34495e'),
357
+ tickfont=dict(size=12, color='#34495e')
358
+ ),
359
+ yaxis=dict(
360
+ title='Percentage (%)',
361
+ titlefont=dict(size=14, color='#34495e'),
362
+ tickfont=dict(size=12, color='#34495e'),
363
+ range=[0, 100]
364
+ ),
365
+ plot_bgcolor='rgba(0,0,0,0)',
366
+ paper_bgcolor='rgba(0,0,0,0)',
367
+ showlegend=False,
368
+ height=400,
369
+ margin=dict(t=60, b=50, l=50, r=50)
370
+ )
371
+
372
+ fig.update_xaxis(showgrid=False, zeroline=False)
373
+ fig.update_yaxis(showgrid=True, gridwidth=1, gridcolor='rgba(0,0,0,0.1)')
374
+
375
+ return fig
376
+
377
+ def analyze_text_enhanced(text):
378
+ """Enhanced analysis function with improved result formatting"""
379
  if not text or len(text.strip()) < 10:
380
  return (
381
  "⚠️ Please provide at least 10 characters of text for accurate analysis.",
382
+ None, # Chart
383
+ "", # Metrics HTML
384
+ f"{len(text.strip())}" # Text length
 
385
  )
386
 
387
  start_time = time.time()
388
 
389
  try:
390
+ # Get analysis results
391
  primary_category, category_scores, confidence = detector.classify_text_category(text)
392
 
393
+ # Calculate percentages
394
+ ai_percentage = (category_scores['ai_generated'] + category_scores['ai_refined']) * 100
395
+ human_percentage = (category_scores['human_ai_refined'] + category_scores['human_written']) * 100
396
 
397
  processing_time = (time.time() - start_time) * 1000
398
 
399
+ # Part 1: Summary Score
400
+ summary_html = f"""
401
+ <div style="text-align: center; background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
402
+ color: white; padding: 30px; border-radius: 15px; margin: 20px 0; box-shadow: 0 8px 25px rgba(0,0,0,0.15);">
403
+ <div style="font-size: 48px; font-weight: bold; margin-bottom: 10px; text-shadow: 2px 2px 4px rgba(0,0,0,0.3);">
404
+ {ai_percentage:.0f}%
405
+ </div>
406
+ <div style="font-size: 18px; line-height: 1.4; margin-bottom: 5px;">
407
+ of this text is likely <strong>AI-generated or AI-refined</strong>
408
+ </div>
409
+ <div style="font-size: 14px; opacity: 0.9; font-style: italic;">
410
+ (This score represents the percentage of words that are likely AI-generated or have been refined using AI tools.)
411
+ </div>
412
+ </div>
413
+ """
414
+
415
+ # Part 2: Create bar chart
416
+ bar_chart = create_bar_chart(ai_percentage, human_percentage)
417
+
418
+ # Part 2: Detailed metrics HTML
419
+ metrics_html = f"""
420
+ <div style="margin: 20px 0; padding: 20px; background: #f8f9fa; border-radius: 12px; border-left: 5px solid #667eea;">
421
+ <h4 style="color: #2c3e50; margin-bottom: 15px; font-size: 16px;">πŸ“Š Detailed Breakdown</h4>
422
+
423
+ <div style="display: grid; grid-template-columns: 1fr 1fr; gap: 15px; margin-bottom: 20px;">
424
+
425
+ <div style="background: white; padding: 15px; border-radius: 8px; border: 1px solid #e9ecef;">
426
+ <div style="display: flex; align-items: center; margin-bottom: 8px;">
427
+ <span style="font-size: 20px; margin-right: 8px;">πŸ€–</span>
428
+ <span style="font-weight: 600; color: #2c3e50;">AI-generated</span>
429
+ <span title="Text likely generated by AI, like ChatGPT or Gemini." style="margin-left: 5px; cursor: help; color: #6c757d;">β“˜</span>
430
+ </div>
431
+ <div style="font-size: 24px; font-weight: bold; color: #FF6B6B;">
432
+ {category_scores['ai_generated']*100:.0f}%
433
+ </div>
434
+ </div>
435
+
436
+ <div style="background: white; padding: 15px; border-radius: 8px; border: 1px solid #e9ecef;">
437
+ <div style="display: flex; align-items: center; margin-bottom: 8px;">
438
+ <span style="font-size: 20px; margin-right: 8px;">πŸ› οΈ</span>
439
+ <span style="font-weight: 600; color: #2c3e50;">AI-generated & AI-refined</span>
440
+ <span title="Text likely generated by AI, then refined or altered using AI tools." style="margin-left: 5px; cursor: help; color: #6c757d;">β“˜</span>
441
+ </div>
442
+ <div style="font-size: 24px; font-weight: bold; color: #FFA07A;">
443
+ {category_scores['ai_refined']*100:.0f}%
444
+ </div>
445
+ </div>
446
+
447
+ <div style="background: white; padding: 15px; border-radius: 8px; border: 1px solid #e9ecef;">
448
+ <div style="display: flex; align-items: center; margin-bottom: 8px;">
449
+ <span style="font-size: 20px; margin-right: 8px;">✍️</span>
450
+ <span style="font-weight: 600; color: #2c3e50;">Human-written & AI-refined</span>
451
+ <span title="Text likely written by humans, then refined or altered using AI tools." style="margin-left: 5px; cursor: help; color: #6c757d;">β“˜</span>
452
+ </div>
453
+ <div style="font-size: 24px; font-weight: bold; color: #98D8C8;">
454
+ {category_scores['human_ai_refined']*100:.0f}%
455
+ </div>
456
+ </div>
457
 
458
+ <div style="background: white; padding: 15px; border-radius: 8px; border: 1px solid #e9ecef;">
459
+ <div style="display: flex; align-items: center; margin-bottom: 8px;">
460
+ <span style="font-size: 20px; margin-right: 8px;">πŸ‘€</span>
461
+ <span style="font-weight: 600; color: #2c3e50;">Human-written</span>
462
+ <span title="Text likely written by humans without the help of AI or paraphrasing tools." style="margin-left: 5px; cursor: help; color: #6c757d;">β“˜</span>
463
+ </div>
464
+ <div style="font-size: 24px; font-weight: bold; color: #4ECDC4;">
465
+ {category_scores['human_written']*100:.0f}%
466
+ </div>
467
+ </div>
468
 
469
+ </div>
 
 
 
 
470
 
471
+ <div style="text-align: center; padding: 10px; background: white; border-radius: 8px; border: 1px solid #e9ecef;">
472
+ <div style="font-size: 14px; color: #6c757d; margin-bottom: 5px;">Primary Classification</div>
473
+ <div style="font-size: 18px; font-weight: bold; color: #2c3e50;">{primary_category}</div>
474
+ <div style="font-size: 14px; color: #6c757d;">Confidence: {confidence*100:.0f}% | Processing: {processing_time:.0f}ms</div>
475
+ </div>
476
+ </div>
477
  """
478
 
479
  return (
480
+ summary_html,
481
+ bar_chart,
482
+ metrics_html,
483
+ f"Text length: {len(text)} characters, {len(text.split())} words"
 
 
 
 
 
484
  )
485
 
486
  except Exception as e:
487
  return (
488
  f"❌ Error during analysis: {str(e)}",
489
+ None,
490
+ "",
491
  "Error"
492
  )
493
 
494
+ def batch_analyze_enhanced(file):
495
+ """Enhanced batch analysis with improved formatting"""
 
 
496
  if file is None:
497
  return "Please upload a text file."
498
 
 
505
 
506
  results = []
507
  category_counts = {'AI-generated': 0, 'AI-generated & AI-refined': 0, 'Human-written & AI-refined': 0, 'Human-written': 0}
508
+ total_ai_percentage = 0
509
 
510
+ for i, text in enumerate(texts[:15]):
511
  primary_category, category_scores, confidence = detector.classify_text_category(text)
512
  category_counts[primary_category] += 1
513
 
514
+ ai_percentage = (category_scores['ai_generated'] + category_scores['ai_refined']) * 100
515
+ total_ai_percentage += ai_percentage
516
+
517
  results.append(f"""
518
+ **Text {i+1}:** {text[:80]}{'...' if len(text) > 80 else ''}
519
+ **Result:** {primary_category} ({confidence:.1%} confidence)
520
+ **AI Content:** {ai_percentage:.0f}% | **Breakdown:** AI-gen: {category_scores['ai_generated']:.0%}, AI-refined: {category_scores['ai_refined']:.0%}, Human+AI: {category_scores['human_ai_refined']:.0%}, Human: {category_scores['human_written']:.0%}
521
  """)
522
 
523
+ avg_ai_percentage = total_ai_percentage / len(results) if results else 0
524
+
525
  summary = f"""
526
  ## πŸ“Š Batch Analysis Summary
527
 
528
+ **Total texts analyzed:** {len(results)}
529
+ **Average AI content:** {avg_ai_percentage:.1f}%
530
 
531
  ### Category Distribution:
532
+ - **AI-generated:** {category_counts['AI-generated']} texts ({category_counts['AI-generated']/len(results)*100:.0f}%)
533
+ - **AI-generated & AI-refined:** {category_counts['AI-generated & AI-refined']} texts ({category_counts['AI-generated & AI-refined']/len(results)*100:.0f}%)
534
+ - **Human-written & AI-refined:** {category_counts['Human-written & AI-refined']} texts ({category_counts['Human-written & AI-refined']/len(results)*100:.0f}%)
535
+ - **Human-written:** {category_counts['Human-written']} texts ({category_counts['Human-written']/len(results)*100:.0f}%)
536
+
537
+ ---
538
 
539
  ### Individual Results:
540
  """
 
544
  except Exception as e:
545
  return f"Error processing file: {str(e)}"
546
 
547
+ # API endpoint for JSON results
548
+ def api_analyze_text(text: str) -> Dict:
549
+ """API endpoint that returns JSON results"""
550
+ return detector.get_analysis_json(text)
551
+
552
  def create_improved_interface():
553
+ """Create enhanced Gradio interface with improved results display"""
554
 
555
  custom_css = """
556
  .gradio-container {
557
  font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
558
+ max-width: 1400px;
559
  margin: 0 auto;
560
  }
561
  .gr-button-primary {
 
563
  border: none;
564
  border-radius: 8px;
565
  font-weight: 600;
566
+ padding: 12px 24px;
567
  }
568
  .gr-button-primary:hover {
569
  transform: translateY(-2px);
570
  box-shadow: 0 8px 25px rgba(102, 126, 234, 0.3);
571
  }
572
+ .understanding-section {
573
+ background: #f8f9fa;
574
+ border: 1px solid #e9ecef;
575
+ border-radius: 8px;
576
+ padding: 20px;
577
+ margin-top: 20px;
578
  }
579
  """
580
 
581
  with gr.Blocks(css=custom_css, title="Advanced AI Text Detector", theme=gr.themes.Soft()) as interface:
582
 
583
  gr.HTML("""
584
+ <div style="text-align: center; padding: 25px; background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
585
+ color: white; border-radius: 15px; margin-bottom: 25px; box-shadow: 0 10px 30px rgba(0,0,0,0.2);">
586
+ <h1 style="margin-bottom: 10px; font-size: 2.2em; text-shadow: 2px 2px 4px rgba(0,0,0,0.3);">πŸ” Advanced AI Text Detector</h1>
587
+ <p style="font-size: 1.1em; margin: 0; opacity: 0.95;">
588
+ Sophisticated 4-category classification with enhanced accuracy and user-friendly results
589
  </p>
590
+ <p style="font-size: 0.9em; margin-top: 8px; opacity: 0.8;">
591
+ Detects pure AI content, AI-refined text, and human writing with detailed breakdowns
592
  </p>
593
  </div>
594
  """)
 
598
  # Single text analysis tab
599
  with gr.Tab("πŸ” Text Analysis", elem_id="single-analysis"):
600
  with gr.Row():
601
+ with gr.Column(scale=1):
602
  text_input = gr.Textbox(
603
  label="πŸ“ Enter text to analyze",
604
  placeholder="Paste your text here (minimum 10 characters for accurate analysis)...",
605
+ lines=10,
606
+ max_lines=20,
607
  show_label=True
608
  )
609
 
610
  analyze_btn = gr.Button(
611
  "πŸš€ Analyze Text",
612
  variant="primary",
613
+ size="lg"
 
614
  )
615
 
616
+ text_info = gr.Textbox(
617
+ label="πŸ“Š Text Information",
618
+ interactive=False,
619
+ show_label=True
620
  )
621
 
622
+ with gr.Column(scale=1):
623
+ # Part 1: Summary Score
624
+ summary_result = gr.HTML(
625
+ label="πŸ“Š Analysis Summary",
626
+ value="<div style='text-align: center; padding: 20px; color: #6c757d;'>Results will appear here after analysis...</div>"
 
 
 
 
 
 
 
 
 
627
  )
628
 
629
+ # Part 2: Bar Chart
630
+ bar_chart = gr.Plot(
631
+ label="πŸ“ˆ AI vs Human Distribution",
632
+ show_label=True
 
 
 
 
 
 
633
  )
634
 
635
+ # Part 2: Detailed Metrics
636
+ detailed_metrics = gr.HTML(
637
+ label="πŸ“‹ Detailed Metrics",
638
+ value=""
639
+ )
640
 
641
+ # Part 3: Understanding Results (Collapsible)
642
+ with gr.Accordion("Understanding Your Results", open=False):
643
+ gr.HTML("""
644
+ <div style="padding: 20px; line-height: 1.6;">
645
+ <h4 style="color: #2c3e50; margin-bottom: 15px;">🎯 How to Interpret Your Results</h4>
646
+
647
+ <p><strong>Our AI detector estimates the likelihood that text was created or modified using AI tools.</strong>
648
+ The percentage shows our system's confidence, but it's not a definitive judgment.</p>
649
+
650
+ <h5 style="color: #34495e; margin-top: 20px; margin-bottom: 10px;">πŸ“‹ Category Explanations:</h5>
651
+ <ul style="margin-left: 20px;">
652
+ <li><strong>πŸ€– AI-generated:</strong> Text that appears to be directly created by AI models like ChatGPT, GPT-4, or Gemini</li>
653
+ <li><strong>πŸ› οΈ AI-generated & AI-refined:</strong> AI-created text that has been further processed or polished using AI tools</li>
654
+ <li><strong>✍️ Human-written & AI-refined:</strong> Human-authored content that has been enhanced, edited, or refined using AI assistance</li>
655
+ <li><strong>πŸ‘€ Human-written:</strong> Text that appears to be written entirely by humans without AI assistance</li>
656
+ </ul>
657
+
658
+ <h5 style="color: #34495e; margin-top: 20px; margin-bottom: 10px;">⚠️ Important Considerations:</h5>
659
+ <ul style="margin-left: 20px;">
660
+ <li><strong>Use your best judgment</strong> when reviewing results - AI detection is not 100% accurate</li>
661
+ <li><strong>Never rely solely on AI detection</strong> for decisions that could impact someone's career, academic standing, or reputation</li>
662
+ <li><strong>Consider context:</strong> Short texts (under 50 words) may be less reliable to classify</li>
663
+ <li><strong>False positives occur:</strong> Human text with formal language may sometimes be flagged as AI-generated</li>
664
+ <li><strong>Evolving technology:</strong> AI detection accuracy varies as both generation and detection methods improve</li>
665
+ </ul>
666
+
667
+ <div style="background: #fff3cd; border: 1px solid #ffeaa7; border-radius: 8px; padding: 15px; margin-top: 20px;">
668
+ <h5 style="color: #856404; margin-bottom: 10px;">πŸ’‘ Best Practices:</h5>
669
+ <p style="margin: 0; color: #856404;">
670
+ Combine AI detection results with manual review, contextual knowledge, and other verification methods.
671
+ This tool should supportβ€”not replaceβ€”human judgment in content evaluation.
672
+ </p>
673
+ </div>
674
+ </div>
675
+ """)
676
 
677
  # Batch analysis tab
678
  with gr.Tab("πŸ“„ Batch Analysis", elem_id="batch-analysis"):
679
  gr.HTML("""
680
+ <div style="background: #e8f4fd; padding: 20px; border-radius: 12px; border-left: 5px solid #2196F3; margin-bottom: 20px;">
681
+ <h4 style="color: #1565C0; margin-bottom: 15px;">πŸ“‹ Batch Analysis Instructions</h4>
682
+ <ul style="color: #1976D2; line-height: 1.6;">
683
+ <li>Upload a <strong>.txt</strong> file with one text sample per line</li>
684
+ <li>Each line should contain at least 10 characters for accurate analysis</li>
685
+ <li>Maximum 15 texts will be processed to ensure optimal performance</li>
686
+ <li>Results include category distribution, individual analysis, and summary statistics</li>
687
+ <li>Larger files may take longer to process - please be patient</li>
688
  </ul>
689
  </div>
690
  """)
 
698
  batch_analyze_btn = gr.Button("πŸ” Analyze Batch", variant="primary", size="lg")
699
  batch_results = gr.Markdown(label="πŸ“Š Batch Results")
700
 
701
+ # API Documentation tab
702
+ with gr.Tab("πŸ”Œ API Access", elem_id="api-docs"):
703
+ gr.Markdown("""
704
+ # πŸ”Œ API Documentation
705
+
706
+ This detector provides a JSON API for programmatic access. Perfect for integrating AI detection into your own applications.
707
+
708
+ ## πŸ“‘ API Endpoint
709
+
710
+ **POST** `/api/analyze`
711
+
712
+ ```bash
713
+ curl -X POST "your-space-url/api/analyze" \
714
+ -H "Content-Type: application/json" \
715
+ -d '{"text": "Your text to analyze here"}'
716
+ ```
717
+
718
+ ## πŸ“₯ Request Format
719
+
720
+ ```json
721
+ {
722
+ "text": "The text you want to analyze for AI content detection"
723
+ }
724
+ ```
725
+
726
+ ## πŸ“€ Response Format
727
+
728
+ ```json
729
+ {
730
+ "ai_percentage": 45.2,
731
+ "human_percentage": 54.8,
732
+ "category_scores": {
733
+ "ai_generated": 30.1,
734
+ "ai_refined": 15.1,
735
+ "human_ai_refined": 12.3,
736
+ "human_written": 42.5
737
+ },
738
+ "primary_category": "human_written",
739
+ "confidence": 85.7,
740
+ "processing_time_ms": 156.3
741
+ }
742
+ ```
743
+
744
+ ## πŸ“‹ Response Fields
745
+
746
+ - `ai_percentage`: Overall percentage of AI-generated or AI-refined content
747
+ - `human_percentage`: Overall percentage of human-written content
748
+ - `category_scores`: Breakdown of all 4 detection categories (percentages)
749
+ - `primary_category`: Most likely category for the text
750
+ - `confidence`: Confidence score for the primary category (0-100)
751
+ - `processing_time_ms`: Time taken to analyze the text in milliseconds
752
+
753
+ ## πŸ”§ Integration Examples
754
+
755
+ ### Python
756
+ ```python
757
+ import requests
758
+ import json
759
+
760
+ def analyze_text(text):
761
+ url = "your-space-url/api/analyze"
762
+ data = {"text": text}
763
+
764
+ response = requests.post(url, json=data)
765
+ return response.json()
766
+
767
+ result = analyze_text("Your text here")
768
+ print(f"AI Content: {result['ai_percentage']}%")
769
+ ```
770
+
771
+ ### JavaScript
772
+ ```javascript
773
+ async function analyzeText(text) {
774
+ const response = await fetch('your-space-url/api/analyze', {
775
+ method: 'POST',
776
+ headers: { 'Content-Type': 'application/json' },
777
+ body: JSON.stringify({ text: text })
778
+ });
779
+
780
+ return await response.json();
781
+ }
782
+
783
+ const result = await analyzeText("Your text here");
784
+ console.log(`AI Content: ${result.ai_percentage}%`);
785
+ ```
786
+
787
+ ## ⚠️ Usage Guidelines
788
+
789
+ - **Rate Limiting**: Please limit requests to avoid overloading the system
790
+ - **Text Length**: Minimum 10 characters, optimal 50+ words for best accuracy
791
+ - **Language**: Optimized for English text
792
+ - **Reliability**: Use results as guidance, not absolute truth
793
+
794
+ """)
795
+
796
  # About tab
797
  with gr.Tab("ℹ️ About", elem_id="about-tab"):
798
  gr.Markdown("""
 
800
 
801
  ## 🎯 Enhanced 4-Category Classification
802
 
803
+ This advanced detector provides nuanced analysis beyond simple AI vs Human classification, offering detailed insights into different types of AI involvement in text creation.
804
 
805
  ### πŸ“‹ Detection Categories
806
 
 
809
  3. **✍️ Human-written & AI-refined**: Human content enhanced or edited using AI tools
810
  4. **πŸ‘€ Human-written**: Pure human content without AI assistance
811
 
812
+ ### πŸš€ Key Improvements
813
 
814
+ - **Enhanced Results Display**: Clear percentage summary, visual bar chart, and detailed breakdowns
815
  - **Multi-layered Analysis**: Combines transformer models with linguistic feature analysis
816
  - **Refinement Detection**: Identifies patterns indicating AI editing/enhancement
 
817
  - **Confidence Scoring**: Provides reliability measures for each prediction
818
+ - **JSON API**: Programmatic access for integration with other applications
819
 
820
+ ### πŸ“Š Technical Features
821
 
822
  - **Linguistic Feature Analysis**: Examines vocabulary diversity, sentence structure, punctuation patterns
823
  - **Refinement Pattern Detection**: Identifies signs of AI editing or enhancement
824
  - **Transformer Integration**: Uses fine-tuned RoBERTa models for baseline detection
825
  - **Ensemble Classification**: Combines multiple approaches for robust predictions
826
+ - **Real-time Processing**: Fast analysis with sub-second response times
827
 
828
  ### 🎨 Use Cases
829
 
 
831
  - **Academic Integrity**: Detect AI assistance in student submissions
832
  - **Content Moderation**: Identify AI-generated content in social media
833
  - **Quality Assessment**: Understand the level of AI involvement in text creation
834
+ - **Research & Development**: Analyze AI text patterns for research purposes
835
 
836
  ### ⚑ Performance Characteristics
837
 
 
839
  - **Processing Speed**: < 2 seconds for most texts
840
  - **Optimal Text Length**: 50+ words for best accuracy
841
  - **Language Support**: Optimized for English text
842
+ - **API Response**: JSON format for easy integration
843
 
844
  ### πŸ”¬ Methodology
845
 
846
+ The detector uses a sophisticated ensemble approach:
847
+ 1. Pre-trained transformer model predictions (RoBERTa-based)
848
+ 2. Linguistic feature extraction and analysis (31+ features)
849
+ 3. AI refinement pattern detection (editing signatures)
850
+ 4. Statistical text analysis (perplexity, complexity)
851
  5. Weighted scoring and normalization
852
 
853
+ ### ⚠️ Limitations & Considerations
854
 
855
  - Performance may vary with very short texts (< 50 words)
856
+ - Heavily paraphrased content may be challenging to classify accurately
857
+ - Newer AI models may require periodic detector updates
858
  - Non-English text may have reduced accuracy
859
+ - False positives can occur with highly formal human writing
860
 
861
  ### πŸ”„ Continuous Improvement
862
 
863
+ This detector is regularly updated to:
864
+ - Adapt to new AI text generation techniques
865
+ - Improve accuracy across different content types
866
+ - Enhance user experience and result interpretation
867
+ - Expand language support and domain coverage
868
+
869
+ ---
870
+
871
+ **Version**: 2.0.0 | **Updated**: September 2025 | **Model**: RoBERTa-base-openai-detector
872
  """)
873
 
874
  # Event handlers
875
  analyze_btn.click(
876
+ fn=analyze_text_enhanced,
877
  inputs=[text_input],
878
+ outputs=[summary_result, bar_chart, detailed_metrics, text_info]
 
 
 
 
879
  )
880
 
881
  batch_analyze_btn.click(
882
+ fn=batch_analyze_enhanced,
883
  inputs=[file_input],
884
  outputs=[batch_results]
885
  )
 
887
  # Example texts
888
  gr.Examples(
889
  examples=[
890
+ ["Artificial intelligence has revolutionized numerous industries through advanced machine learning algorithms that enable automated decision-making processes and enhanced operational efficiency across various sectors. These technological innovations have transformed traditional workflows and created new opportunities for businesses to optimize their operations."],
891
+ ["I can't believe how incredible this weekend trip was! We drove up to the mountains and the whole experience was just magical. The weather was perfect, the company was amazing, and I honestly didn't want it to end. There's something about being in nature that just makes everything feel right, you know?"],
892
+ ["The implementation of sustainable energy solutions requires comprehensive analysis of environmental factors, economic considerations, and technological feasibility to ensure optimal outcomes for stakeholders. Organizations must carefully evaluate various renewable energy options before making strategic investment decisions."],
893
+ ["Hey Sarah! Thanks for your email about the project timeline. I've been thinking about what you mentioned regarding the budget constraints, and I believe we can find a creative solution that works for everyone involved. Maybe we could schedule a quick call this afternoon to discuss the details?"]
894
  ],
895
  inputs=text_input,
896
+ outputs=[summary_result, bar_chart, detailed_metrics, text_info],
897
+ fn=analyze_text_enhanced,
 
 
 
 
898
  cache_examples=False
899
  )
900
 
901
  return interface
902
 
903
+ # Create FastAPI app for API endpoints
904
+ app = FastAPI(title="AI Text Detector API", version="2.0.0")
905
+
906
+ app.add_middleware(
907
+ CORSMiddleware,
908
+ allow_origins=["*"],
909
+ allow_credentials=True,
910
+ allow_methods=["*"],
911
+ allow_headers=["*"],
912
+ )
913
+
914
+ @app.post("/api/analyze")
915
+ async def analyze_api(request: dict):
916
+ """API endpoint for text analysis"""
917
+ text = request.get("text", "")
918
+ return api_analyze_text(text)
919
+
920
+ @app.get("/api/health")
921
+ async def health_check():
922
+ """Health check endpoint"""
923
+ return {"status": "healthy", "version": "2.0.0"}
924
+
925
+ # Launch the interface
926
  if __name__ == "__main__":
927
  interface = create_improved_interface()
928
  interface.launch(
requirements.txt CHANGED
@@ -1,7 +1,8 @@
1
- torch
2
- transformers
3
  gradio>=4.0.0
4
- numpy
5
- datasets
6
- tokenizers
7
- accelerate
 
 
 
 
 
 
1
  gradio>=4.0.0
2
+ torch>=1.13.0
3
+ transformers>=4.25.0
4
+ numpy>=1.21.0
5
+ scikit-learn>=1.2.0
6
+ plotly>=5.0.0
7
+ fastapi>=0.68.0
8
+ uvicorn>=0.15.0