Authenticity / text_analyzer.py
ranamhamoud's picture
Upload folder using huggingface_hub
01b5c48 unverified
raw
history blame
4.79 kB
import re
import requests
from typing import Dict, List, Tuple, Optional
import torch
from transformers import (
AutoTokenizer,
AutoModelForSequenceClassification,
RobertaTokenizer,
RobertaForSequenceClassification
)
import numpy as np
from collections import Counter
import warnings
warnings.filterwarnings("ignore")
try:
from plagiarism_detection import ai_plagiarism_detection
DESKLIB_AVAILABLE = True
except ImportError:
DESKLIB_AVAILABLE = False
print("Warning: plagiarism_detection module not found. Using fallback AI detection.")
class AITextDetector:
def __init__(self, device: str = None, threshold: float = 0.78):
self.threshold = threshold
if not DESKLIB_AVAILABLE:
print("Warning: plagiarism_detection module not found. AI detection will not be available.")
print("Ensure plagiarism_detection.py is in the same directory.")
self.available = False
else:
print(f"Using Desklib AI text detector (threshold: {self.threshold})")
self.available = True
def detect_ai_text(self, text: str) -> Dict:
if not self.available:
# Return neutral result if Desklib not available
return {
'ai_generated': False,
'confidence': 0.5,
'indicators': [],
'interpretation': "AI detection not available. Install plagiarism_detection module.",
'model_used': 'N/A (module not found)'
}
# Use Desklib AI detector
try:
probability, ai_detected = ai_plagiarism_detection(
text,
threshold=self.threshold,
show_results=False
)
return {
'ai_generated': ai_detected,
'confidence': float(probability),
'indicators': self._identify_ai_indicators(probability),
'interpretation': self._interpret_ai_detection(probability),
'model_used': 'Desklib AI Detector v1.01'
}
except Exception as e:
print(f"Error in AI detection: {e}")
return {
'ai_generated': False,
'confidence': 0.5,
'indicators': [],
'interpretation': f"AI detection error: {str(e)}",
'model_used': 'Error'
}
def _identify_ai_indicators(self, probability: float) -> List[str]:
indicators = []
if probability > 0.9:
indicators.append("Very high AI probability (>90%)")
elif probability > 0.7:
indicators.append("High AI probability (70-90%)")
elif probability > self.threshold:
indicators.append(f"AI detected above threshold ({self.threshold*100:.0f}%)")
return indicators
def _interpret_ai_detection(self, score: float) -> str:
interpretation = f"**AI-Generated Text Detection:**\n\n"
interpretation += f"- AI Probability Score: {score*100:.1f}%\n"
interpretation += f"- Detection Threshold: {self.threshold*100:.0f}%\n"
return interpretation
class TextAuthenticityAnalyzer:
def __init__(self, device: str = None, ai_threshold: float = 0.78):
self.ai_detector = AITextDetector(device=device, threshold=ai_threshold)
def analyze(self, text: str) -> Dict:
# Run AI detection
ai_results = self.ai_detector.detect_ai_text(text)
# Calculate overall authenticity score based on AI detection
ai_penalty = ai_results['confidence']
authenticity_score = 1.0 - ai_penalty
# Determine overall assessment
if authenticity_score < 0.3:
overall_assessment = "HIGH RISK: Strong AI-generated text indicators"
risk_level = "high"
elif authenticity_score < 0.5:
overall_assessment = "MODERATE RISK: Likely AI-generated"
risk_level = "moderate"
elif authenticity_score < 0.7:
overall_assessment = "LOW RISK: Some AI characteristics"
risk_level = "low"
else:
overall_assessment = "AUTHENTIC: Text appears human-written"
risk_level = "minimal"
return {
'authenticity_score': float(authenticity_score),
'risk_level': risk_level,
'overall_assessment': overall_assessment,
'ai_detection': ai_results,
}
if __name__ == "__main__":
# Example usage
analyzer = TextAuthenticityAnalyzer()
print("Text authenticity analyzer initialized.")
print("Components: Plagiarism Detector + AI Text Detector")