Spaces:
Sleeping
Sleeping
File size: 4,790 Bytes
01b5c48 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 |
import re
import requests
from typing import Dict, List, Tuple, Optional
import torch
from transformers import (
AutoTokenizer,
AutoModelForSequenceClassification,
RobertaTokenizer,
RobertaForSequenceClassification
)
import numpy as np
from collections import Counter
import warnings
warnings.filterwarnings("ignore")
try:
from plagiarism_detection import ai_plagiarism_detection
DESKLIB_AVAILABLE = True
except ImportError:
DESKLIB_AVAILABLE = False
print("Warning: plagiarism_detection module not found. Using fallback AI detection.")
class AITextDetector:
def __init__(self, device: str = None, threshold: float = 0.78):
self.threshold = threshold
if not DESKLIB_AVAILABLE:
print("Warning: plagiarism_detection module not found. AI detection will not be available.")
print("Ensure plagiarism_detection.py is in the same directory.")
self.available = False
else:
print(f"Using Desklib AI text detector (threshold: {self.threshold})")
self.available = True
def detect_ai_text(self, text: str) -> Dict:
if not self.available:
# Return neutral result if Desklib not available
return {
'ai_generated': False,
'confidence': 0.5,
'indicators': [],
'interpretation': "AI detection not available. Install plagiarism_detection module.",
'model_used': 'N/A (module not found)'
}
# Use Desklib AI detector
try:
probability, ai_detected = ai_plagiarism_detection(
text,
threshold=self.threshold,
show_results=False
)
return {
'ai_generated': ai_detected,
'confidence': float(probability),
'indicators': self._identify_ai_indicators(probability),
'interpretation': self._interpret_ai_detection(probability),
'model_used': 'Desklib AI Detector v1.01'
}
except Exception as e:
print(f"Error in AI detection: {e}")
return {
'ai_generated': False,
'confidence': 0.5,
'indicators': [],
'interpretation': f"AI detection error: {str(e)}",
'model_used': 'Error'
}
def _identify_ai_indicators(self, probability: float) -> List[str]:
indicators = []
if probability > 0.9:
indicators.append("Very high AI probability (>90%)")
elif probability > 0.7:
indicators.append("High AI probability (70-90%)")
elif probability > self.threshold:
indicators.append(f"AI detected above threshold ({self.threshold*100:.0f}%)")
return indicators
def _interpret_ai_detection(self, score: float) -> str:
interpretation = f"**AI-Generated Text Detection:**\n\n"
interpretation += f"- AI Probability Score: {score*100:.1f}%\n"
interpretation += f"- Detection Threshold: {self.threshold*100:.0f}%\n"
return interpretation
class TextAuthenticityAnalyzer:
def __init__(self, device: str = None, ai_threshold: float = 0.78):
self.ai_detector = AITextDetector(device=device, threshold=ai_threshold)
def analyze(self, text: str) -> Dict:
# Run AI detection
ai_results = self.ai_detector.detect_ai_text(text)
# Calculate overall authenticity score based on AI detection
ai_penalty = ai_results['confidence']
authenticity_score = 1.0 - ai_penalty
# Determine overall assessment
if authenticity_score < 0.3:
overall_assessment = "HIGH RISK: Strong AI-generated text indicators"
risk_level = "high"
elif authenticity_score < 0.5:
overall_assessment = "MODERATE RISK: Likely AI-generated"
risk_level = "moderate"
elif authenticity_score < 0.7:
overall_assessment = "LOW RISK: Some AI characteristics"
risk_level = "low"
else:
overall_assessment = "AUTHENTIC: Text appears human-written"
risk_level = "minimal"
return {
'authenticity_score': float(authenticity_score),
'risk_level': risk_level,
'overall_assessment': overall_assessment,
'ai_detection': ai_results,
}
if __name__ == "__main__":
# Example usage
analyzer = TextAuthenticityAnalyzer()
print("Text authenticity analyzer initialized.")
print("Components: Plagiarism Detector + AI Text Detector")
|