Jay-Rajput commited on
Commit
399c3c0
Β·
1 Parent(s): f304cbc

ai detector enhanced

Browse files
Files changed (1) hide show
  1. app.py +36 -153
app.py CHANGED
@@ -1,7 +1,8 @@
1
 
2
  """
3
- Advanced AI Text Detector - Enhanced Results Display & API
4
  4-Category Classification with improved UX and JSON API support
 
5
  """
6
 
7
  import gradio as gr
@@ -17,8 +18,6 @@ from collections import Counter
17
  import json
18
  import plotly.graph_objects as go
19
  import plotly.express as px
20
- from fastapi import FastAPI
21
- from fastapi.middleware.cors import CORSMiddleware
22
 
23
  class ImprovedAIDetector:
24
  """
@@ -328,7 +327,7 @@ class ImprovedAIDetector:
328
  detector = ImprovedAIDetector()
329
 
330
  def create_bar_chart(ai_percentage, human_percentage):
331
- """Create vertical bar chart showing AI vs Human percentages"""
332
 
333
  fig = go.Figure(data=[
334
  go.Bar(
@@ -345,6 +344,7 @@ def create_bar_chart(ai_percentage, human_percentage):
345
  )
346
  ])
347
 
 
348
  fig.update_layout(
349
  title=dict(
350
  text='AI vs Human Content Distribution',
@@ -352,15 +352,24 @@ def create_bar_chart(ai_percentage, human_percentage):
352
  font=dict(size=16, color='#2c3e50', family='Arial')
353
  ),
354
  xaxis=dict(
355
- title='Content Type',
356
- titlefont=dict(size=14, color='#34495e'),
357
- tickfont=dict(size=12, color='#34495e')
 
 
 
 
358
  ),
359
  yaxis=dict(
360
- title='Percentage (%)',
361
- titlefont=dict(size=14, color='#34495e'),
 
 
362
  tickfont=dict(size=12, color='#34495e'),
363
- range=[0, 100]
 
 
 
364
  ),
365
  plot_bgcolor='rgba(0,0,0,0)',
366
  paper_bgcolor='rgba(0,0,0,0)',
@@ -369,9 +378,6 @@ def create_bar_chart(ai_percentage, human_percentage):
369
  margin=dict(t=60, b=50, l=50, r=50)
370
  )
371
 
372
- fig.update_xaxis(showgrid=False, zeroline=False)
373
- fig.update_yaxis(showgrid=True, gridwidth=1, gridcolor='rgba(0,0,0,0.1)')
374
-
375
  return fig
376
 
377
  def analyze_text_enhanced(text):
@@ -381,7 +387,7 @@ def analyze_text_enhanced(text):
381
  "⚠️ Please provide at least 10 characters of text for accurate analysis.",
382
  None, # Chart
383
  "", # Metrics HTML
384
- f"{len(text.strip())}" # Text length
385
  )
386
 
387
  start_time = time.time()
@@ -569,13 +575,6 @@ def create_improved_interface():
569
  transform: translateY(-2px);
570
  box-shadow: 0 8px 25px rgba(102, 126, 234, 0.3);
571
  }
572
- .understanding-section {
573
- background: #f8f9fa;
574
- border: 1px solid #e9ecef;
575
- border-radius: 8px;
576
- padding: 20px;
577
- margin-top: 20px;
578
- }
579
  """
580
 
581
  with gr.Blocks(css=custom_css, title="Advanced AI Text Detector", theme=gr.themes.Soft()) as interface:
@@ -639,7 +638,7 @@ def create_improved_interface():
639
  )
640
 
641
  # Part 3: Understanding Results (Collapsible)
642
- with gr.Accordion("Understanding Your Results", open=False):
643
  gr.HTML("""
644
  <div style="padding: 20px; line-height: 1.6;">
645
  <h4 style="color: #2c3e50; margin-bottom: 15px;">🎯 How to Interpret Your Results</h4>
@@ -667,8 +666,9 @@ def create_improved_interface():
667
  <div style="background: #fff3cd; border: 1px solid #ffeaa7; border-radius: 8px; padding: 15px; margin-top: 20px;">
668
  <h5 style="color: #856404; margin-bottom: 10px;">πŸ’‘ Best Practices:</h5>
669
  <p style="margin: 0; color: #856404;">
670
- Combine AI detection results with manual review, contextual knowledge, and other verification methods.
671
- This tool should supportβ€”not replaceβ€”human judgment in content evaluation.
 
672
  </p>
673
  </div>
674
  </div>
@@ -698,107 +698,12 @@ def create_improved_interface():
698
  batch_analyze_btn = gr.Button("πŸ” Analyze Batch", variant="primary", size="lg")
699
  batch_results = gr.Markdown(label="πŸ“Š Batch Results")
700
 
701
- # API Documentation tab
702
- with gr.Tab("πŸ”Œ API Access", elem_id="api-docs"):
703
- gr.Markdown("""
704
- # πŸ”Œ API Documentation
705
-
706
- This detector provides a JSON API for programmatic access. Perfect for integrating AI detection into your own applications.
707
-
708
- ## πŸ“‘ API Endpoint
709
-
710
- **POST** `/api/analyze`
711
-
712
- ```bash
713
- curl -X POST "your-space-url/api/analyze" \
714
- -H "Content-Type: application/json" \
715
- -d '{"text": "Your text to analyze here"}'
716
- ```
717
-
718
- ## πŸ“₯ Request Format
719
-
720
- ```json
721
- {
722
- "text": "The text you want to analyze for AI content detection"
723
- }
724
- ```
725
-
726
- ## πŸ“€ Response Format
727
-
728
- ```json
729
- {
730
- "ai_percentage": 45.2,
731
- "human_percentage": 54.8,
732
- "category_scores": {
733
- "ai_generated": 30.1,
734
- "ai_refined": 15.1,
735
- "human_ai_refined": 12.3,
736
- "human_written": 42.5
737
- },
738
- "primary_category": "human_written",
739
- "confidence": 85.7,
740
- "processing_time_ms": 156.3
741
- }
742
- ```
743
-
744
- ## πŸ“‹ Response Fields
745
-
746
- - `ai_percentage`: Overall percentage of AI-generated or AI-refined content
747
- - `human_percentage`: Overall percentage of human-written content
748
- - `category_scores`: Breakdown of all 4 detection categories (percentages)
749
- - `primary_category`: Most likely category for the text
750
- - `confidence`: Confidence score for the primary category (0-100)
751
- - `processing_time_ms`: Time taken to analyze the text in milliseconds
752
-
753
- ## πŸ”§ Integration Examples
754
-
755
- ### Python
756
- ```python
757
- import requests
758
- import json
759
-
760
- def analyze_text(text):
761
- url = "your-space-url/api/analyze"
762
- data = {"text": text}
763
-
764
- response = requests.post(url, json=data)
765
- return response.json()
766
-
767
- result = analyze_text("Your text here")
768
- print(f"AI Content: {result['ai_percentage']}%")
769
- ```
770
-
771
- ### JavaScript
772
- ```javascript
773
- async function analyzeText(text) {
774
- const response = await fetch('your-space-url/api/analyze', {
775
- method: 'POST',
776
- headers: { 'Content-Type': 'application/json' },
777
- body: JSON.stringify({ text: text })
778
- });
779
-
780
- return await response.json();
781
- }
782
-
783
- const result = await analyzeText("Your text here");
784
- console.log(`AI Content: ${result.ai_percentage}%`);
785
- ```
786
-
787
- ## ⚠️ Usage Guidelines
788
-
789
- - **Rate Limiting**: Please limit requests to avoid overloading the system
790
- - **Text Length**: Minimum 10 characters, optimal 50+ words for best accuracy
791
- - **Language**: Optimized for English text
792
- - **Reliability**: Use results as guidance, not absolute truth
793
-
794
- """)
795
-
796
  # About tab
797
  with gr.Tab("ℹ️ About", elem_id="about-tab"):
798
  gr.Markdown("""
799
  # πŸ” Advanced AI Text Detector
800
 
801
- ## 🎯 Enhanced 4-Category Classification
802
 
803
  This advanced detector provides nuanced analysis beyond simple AI vs Human classification, offering detailed insights into different types of AI involvement in text creation.
804
 
@@ -809,13 +714,13 @@ def create_improved_interface():
809
  3. **✍️ Human-written & AI-refined**: Human content enhanced or edited using AI tools
810
  4. **πŸ‘€ Human-written**: Pure human content without AI assistance
811
 
812
- ### πŸš€ Key Improvements
813
 
814
  - **Enhanced Results Display**: Clear percentage summary, visual bar chart, and detailed breakdowns
815
  - **Multi-layered Analysis**: Combines transformer models with linguistic feature analysis
816
  - **Refinement Detection**: Identifies patterns indicating AI editing/enhancement
817
  - **Confidence Scoring**: Provides reliability measures for each prediction
818
- - **JSON API**: Programmatic access for integration with other applications
819
 
820
  ### πŸ“Š Technical Features
821
 
@@ -839,18 +744,18 @@ def create_improved_interface():
839
  - **Processing Speed**: < 2 seconds for most texts
840
  - **Optimal Text Length**: 50+ words for best accuracy
841
  - **Language Support**: Optimized for English text
842
- - **API Response**: JSON format for easy integration
843
 
844
- ### πŸ”¬ Methodology
845
 
846
  The detector uses a sophisticated ensemble approach:
847
- 1. Pre-trained transformer model predictions (RoBERTa-based)
848
- 2. Linguistic feature extraction and analysis (31+ features)
849
- 3. AI refinement pattern detection (editing signatures)
850
- 4. Statistical text analysis (perplexity, complexity)
851
- 5. Weighted scoring and normalization
852
 
853
- ### ⚠️ Limitations & Considerations
854
 
855
  - Performance may vary with very short texts (< 50 words)
856
  - Heavily paraphrased content may be challenging to classify accurately
@@ -868,7 +773,7 @@ def create_improved_interface():
868
 
869
  ---
870
 
871
- **Version**: 2.0.0 | **Updated**: September 2025 | **Model**: RoBERTa-base-openai-detector
872
  """)
873
 
874
  # Event handlers
@@ -900,28 +805,6 @@ def create_improved_interface():
900
 
901
  return interface
902
 
903
- # Create FastAPI app for API endpoints
904
- app = FastAPI(title="AI Text Detector API", version="2.0.0")
905
-
906
- app.add_middleware(
907
- CORSMiddleware,
908
- allow_origins=["*"],
909
- allow_credentials=True,
910
- allow_methods=["*"],
911
- allow_headers=["*"],
912
- )
913
-
914
- @app.post("/api/analyze")
915
- async def analyze_api(request: dict):
916
- """API endpoint for text analysis"""
917
- text = request.get("text", "")
918
- return api_analyze_text(text)
919
-
920
- @app.get("/api/health")
921
- async def health_check():
922
- """Health check endpoint"""
923
- return {"status": "healthy", "version": "2.0.0"}
924
-
925
  # Launch the interface
926
  if __name__ == "__main__":
927
  interface = create_improved_interface()
 
1
 
2
  """
3
+ Advanced AI Text Detector - Enhanced Results Display & API (FIXED)
4
  4-Category Classification with improved UX and JSON API support
5
+ Fixed Plotly compatibility issues
6
  """
7
 
8
  import gradio as gr
 
18
  import json
19
  import plotly.graph_objects as go
20
  import plotly.express as px
 
 
21
 
22
  class ImprovedAIDetector:
23
  """
 
327
  detector = ImprovedAIDetector()
328
 
329
  def create_bar_chart(ai_percentage, human_percentage):
330
+ """Create vertical bar chart showing AI vs Human percentages - FIXED VERSION"""
331
 
332
  fig = go.Figure(data=[
333
  go.Bar(
 
344
  )
345
  ])
346
 
347
+ # FIXED: Use correct Plotly syntax for layout
348
  fig.update_layout(
349
  title=dict(
350
  text='AI vs Human Content Distribution',
 
352
  font=dict(size=16, color='#2c3e50', family='Arial')
353
  ),
354
  xaxis=dict(
355
+ title=dict(
356
+ text='Content Type',
357
+ font=dict(size=14, color='#34495e')
358
+ ),
359
+ tickfont=dict(size=12, color='#34495e'),
360
+ showgrid=False,
361
+ zeroline=False
362
  ),
363
  yaxis=dict(
364
+ title=dict(
365
+ text='Percentage (%)',
366
+ font=dict(size=14, color='#34495e')
367
+ ),
368
  tickfont=dict(size=12, color='#34495e'),
369
+ range=[0, 100],
370
+ showgrid=True,
371
+ gridwidth=1,
372
+ gridcolor='rgba(0,0,0,0.1)'
373
  ),
374
  plot_bgcolor='rgba(0,0,0,0)',
375
  paper_bgcolor='rgba(0,0,0,0)',
 
378
  margin=dict(t=60, b=50, l=50, r=50)
379
  )
380
 
 
 
 
381
  return fig
382
 
383
  def analyze_text_enhanced(text):
 
387
  "⚠️ Please provide at least 10 characters of text for accurate analysis.",
388
  None, # Chart
389
  "", # Metrics HTML
390
+ f"Text length: {len(text.strip())} characters" # Text length
391
  )
392
 
393
  start_time = time.time()
 
575
  transform: translateY(-2px);
576
  box-shadow: 0 8px 25px rgba(102, 126, 234, 0.3);
577
  }
 
 
 
 
 
 
 
578
  """
579
 
580
  with gr.Blocks(css=custom_css, title="Advanced AI Text Detector", theme=gr.themes.Soft()) as interface:
 
638
  )
639
 
640
  # Part 3: Understanding Results (Collapsible)
641
+ with gr.Accordion("πŸ“š Understanding Your Results", open=False):
642
  gr.HTML("""
643
  <div style="padding: 20px; line-height: 1.6;">
644
  <h4 style="color: #2c3e50; margin-bottom: 15px;">🎯 How to Interpret Your Results</h4>
 
666
  <div style="background: #fff3cd; border: 1px solid #ffeaa7; border-radius: 8px; padding: 15px; margin-top: 20px;">
667
  <h5 style="color: #856404; margin-bottom: 10px;">πŸ’‘ Best Practices:</h5>
668
  <p style="margin: 0; color: #856404;">
669
+ Our AI detector flags text that may be AI-generated. Use your best judgment when reviewing results.
670
+ Never rely on AI detection alone to make decisions that could impact someone's career or academic standing.
671
+ Combine AI detection results with manual review, contextual knowledge, and other verification methods.
672
  </p>
673
  </div>
674
  </div>
 
698
  batch_analyze_btn = gr.Button("πŸ” Analyze Batch", variant="primary", size="lg")
699
  batch_results = gr.Markdown(label="πŸ“Š Batch Results")
700
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
701
  # About tab
702
  with gr.Tab("ℹ️ About", elem_id="about-tab"):
703
  gr.Markdown("""
704
  # πŸ” Advanced AI Text Detector
705
 
706
+ ## 🎯 Enhanced 4-Category Classification System
707
 
708
  This advanced detector provides nuanced analysis beyond simple AI vs Human classification, offering detailed insights into different types of AI involvement in text creation.
709
 
 
714
  3. **✍️ Human-written & AI-refined**: Human content enhanced or edited using AI tools
715
  4. **πŸ‘€ Human-written**: Pure human content without AI assistance
716
 
717
+ ### πŸš€ Key Improvements & Features
718
 
719
  - **Enhanced Results Display**: Clear percentage summary, visual bar chart, and detailed breakdowns
720
  - **Multi-layered Analysis**: Combines transformer models with linguistic feature analysis
721
  - **Refinement Detection**: Identifies patterns indicating AI editing/enhancement
722
  - **Confidence Scoring**: Provides reliability measures for each prediction
723
+ - **User-Friendly Interface**: Professional design optimized for clarity and understanding
724
 
725
  ### πŸ“Š Technical Features
726
 
 
744
  - **Processing Speed**: < 2 seconds for most texts
745
  - **Optimal Text Length**: 50+ words for best accuracy
746
  - **Language Support**: Optimized for English text
747
+ - **Response Format**: Clear visual results with explanations
748
 
749
+ ### πŸ”¬ Detection Methodology
750
 
751
  The detector uses a sophisticated ensemble approach:
752
+ 1. **Pre-trained transformer predictions** (RoBERTa-based)
753
+ 2. **Linguistic feature extraction** (31+ features analyzed)
754
+ 3. **AI refinement pattern detection** (editing signatures)
755
+ 4. **Statistical text analysis** (perplexity, complexity)
756
+ 5. **Weighted scoring and normalization**
757
 
758
+ ### ⚠️ Important Limitations
759
 
760
  - Performance may vary with very short texts (< 50 words)
761
  - Heavily paraphrased content may be challenging to classify accurately
 
773
 
774
  ---
775
 
776
+ **Version**: 2.0.1 | **Updated**: September 2025 | **Status**: Production Ready
777
  """)
778
 
779
  # Event handlers
 
805
 
806
  return interface
807
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
808
  # Launch the interface
809
  if __name__ == "__main__":
810
  interface = create_improved_interface()