Spaces:
Sleeping
Sleeping
| import re | |
| import requests | |
| from typing import Dict, List, Tuple, Optional | |
| import torch | |
| from transformers import ( | |
| AutoTokenizer, | |
| AutoModelForSequenceClassification, | |
| RobertaTokenizer, | |
| RobertaForSequenceClassification | |
| ) | |
| import numpy as np | |
| from collections import Counter | |
| import warnings | |
| warnings.filterwarnings("ignore") | |
| try: | |
| from plagiarism_detection import ai_plagiarism_detection | |
| DESKLIB_AVAILABLE = True | |
| except ImportError: | |
| DESKLIB_AVAILABLE = False | |
| print("Warning: plagiarism_detection module not found. Using fallback AI detection.") | |
| class AITextDetector: | |
| def __init__(self, device: str = None, threshold: float = 0.78): | |
| self.threshold = threshold | |
| if not DESKLIB_AVAILABLE: | |
| print("Warning: plagiarism_detection module not found. AI detection will not be available.") | |
| print("Ensure plagiarism_detection.py is in the same directory.") | |
| self.available = False | |
| else: | |
| print(f"Using Desklib AI text detector (threshold: {self.threshold})") | |
| self.available = True | |
| def detect_ai_text(self, text: str) -> Dict: | |
| if not self.available: | |
| # Return neutral result if Desklib not available | |
| return { | |
| 'ai_generated': False, | |
| 'confidence': 0.5, | |
| 'indicators': [], | |
| 'interpretation': "AI detection not available. Install plagiarism_detection module.", | |
| 'model_used': 'N/A (module not found)' | |
| } | |
| # Use Desklib AI detector | |
| try: | |
| probability, ai_detected = ai_plagiarism_detection( | |
| text, | |
| threshold=self.threshold, | |
| show_results=False | |
| ) | |
| return { | |
| 'ai_generated': ai_detected, | |
| 'confidence': float(probability), | |
| 'indicators': self._identify_ai_indicators(probability), | |
| 'interpretation': self._interpret_ai_detection(probability), | |
| 'model_used': 'Desklib AI Detector v1.01' | |
| } | |
| except Exception as e: | |
| print(f"Error in AI detection: {e}") | |
| return { | |
| 'ai_generated': False, | |
| 'confidence': 0.5, | |
| 'indicators': [], | |
| 'interpretation': f"AI detection error: {str(e)}", | |
| 'model_used': 'Error' | |
| } | |
| def _identify_ai_indicators(self, probability: float) -> List[str]: | |
| indicators = [] | |
| if probability > 0.9: | |
| indicators.append("Very high AI probability (>90%)") | |
| elif probability > 0.7: | |
| indicators.append("High AI probability (70-90%)") | |
| elif probability > self.threshold: | |
| indicators.append(f"AI detected above threshold ({self.threshold*100:.0f}%)") | |
| return indicators | |
| def _interpret_ai_detection(self, score: float) -> str: | |
| interpretation = f"**AI-Generated Text Detection:**\n\n" | |
| interpretation += f"- AI Probability Score: {score*100:.1f}%\n" | |
| interpretation += f"- Detection Threshold: {self.threshold*100:.0f}%\n" | |
| return interpretation | |
| class TextAuthenticityAnalyzer: | |
| def __init__(self, device: str = None, ai_threshold: float = 0.78): | |
| self.ai_detector = AITextDetector(device=device, threshold=ai_threshold) | |
| def analyze(self, text: str) -> Dict: | |
| # Run AI detection | |
| ai_results = self.ai_detector.detect_ai_text(text) | |
| # Calculate overall authenticity score based on AI detection | |
| ai_penalty = ai_results['confidence'] | |
| authenticity_score = 1.0 - ai_penalty | |
| # Determine overall assessment | |
| if authenticity_score < 0.3: | |
| overall_assessment = "HIGH RISK: Strong AI-generated text indicators" | |
| risk_level = "high" | |
| elif authenticity_score < 0.5: | |
| overall_assessment = "MODERATE RISK: Likely AI-generated" | |
| risk_level = "moderate" | |
| elif authenticity_score < 0.7: | |
| overall_assessment = "LOW RISK: Some AI characteristics" | |
| risk_level = "low" | |
| else: | |
| overall_assessment = "AUTHENTIC: Text appears human-written" | |
| risk_level = "minimal" | |
| return { | |
| 'authenticity_score': float(authenticity_score), | |
| 'risk_level': risk_level, | |
| 'overall_assessment': overall_assessment, | |
| 'ai_detection': ai_results, | |
| } | |
| if __name__ == "__main__": | |
| # Example usage | |
| analyzer = TextAuthenticityAnalyzer() | |
| print("Text authenticity analyzer initialized.") | |
| print("Components: Plagiarism Detector + AI Text Detector") | |