Spaces:

ranamhamoud
/

Authenticity

Sleeping

File size: 4,790 Bytes

01b5c48

import re
import requests
from typing import Dict, List, Tuple, Optional
import torch
from transformers import (
    AutoTokenizer, 
    AutoModelForSequenceClassification,
    RobertaTokenizer,
    RobertaForSequenceClassification
)
import numpy as np
from collections import Counter
import warnings
warnings.filterwarnings("ignore")

try:
    from plagiarism_detection import ai_plagiarism_detection
    DESKLIB_AVAILABLE = True
except ImportError:
    DESKLIB_AVAILABLE = False
    print("Warning: plagiarism_detection module not found. Using fallback AI detection.")




class AITextDetector:
    def __init__(self, device: str = None, threshold: float = 0.78):
        self.threshold = threshold
        
        if not DESKLIB_AVAILABLE:
            print("Warning: plagiarism_detection module not found. AI detection will not be available.")
            print("Ensure plagiarism_detection.py is in the same directory.")
            self.available = False
        else:
            print(f"Using Desklib AI text detector (threshold: {self.threshold})")
            self.available = True
    
    def detect_ai_text(self, text: str) -> Dict:

        if not self.available:
            # Return neutral result if Desklib not available
            return {
                'ai_generated': False,
                'confidence': 0.5,
                'indicators': [],
                'interpretation': "AI detection not available. Install plagiarism_detection module.",
                'model_used': 'N/A (module not found)'
            }
        
        # Use Desklib AI detector
        try:
            probability, ai_detected = ai_plagiarism_detection(
                text, 
                threshold=self.threshold, 
                show_results=False
            )
            
            return {
                'ai_generated': ai_detected,
                'confidence': float(probability),
                'indicators': self._identify_ai_indicators(probability),
                'interpretation': self._interpret_ai_detection(probability),
                'model_used': 'Desklib AI Detector v1.01'
            }
        except Exception as e:
            print(f"Error in AI detection: {e}")
            return {
                'ai_generated': False,
                'confidence': 0.5,
                'indicators': [],
                'interpretation': f"AI detection error: {str(e)}",
                'model_used': 'Error'
            }
    
    
    def _identify_ai_indicators(self, probability: float) -> List[str]:
        indicators = []
        
        if probability > 0.9:
            indicators.append("Very high AI probability (>90%)")
        elif probability > 0.7:
            indicators.append("High AI probability (70-90%)")
        elif probability > self.threshold:
            indicators.append(f"AI detected above threshold ({self.threshold*100:.0f}%)")
        
        return indicators
    
    def _interpret_ai_detection(self, score: float) -> str:
        interpretation = f"**AI-Generated Text Detection:**\n\n"
        interpretation += f"- AI Probability Score: {score*100:.1f}%\n"
        interpretation += f"- Detection Threshold: {self.threshold*100:.0f}%\n"
        
        return interpretation


class TextAuthenticityAnalyzer:

    def __init__(self, device: str = None, ai_threshold: float = 0.78):

        self.ai_detector = AITextDetector(device=device, threshold=ai_threshold)
        
    def analyze(self, text: str) -> Dict:
        # Run AI detection
        ai_results = self.ai_detector.detect_ai_text(text)
        
        # Calculate overall authenticity score based on AI detection
        ai_penalty = ai_results['confidence']
        authenticity_score = 1.0 - ai_penalty
        
        # Determine overall assessment
        if authenticity_score < 0.3:
            overall_assessment = "HIGH RISK: Strong AI-generated text indicators"
            risk_level = "high"
        elif authenticity_score < 0.5:
            overall_assessment = "MODERATE RISK: Likely AI-generated"
            risk_level = "moderate"
        elif authenticity_score < 0.7:
            overall_assessment = "LOW RISK: Some AI characteristics"
            risk_level = "low"
        else:
            overall_assessment = "AUTHENTIC: Text appears human-written"
            risk_level = "minimal"
        
        return {
            'authenticity_score': float(authenticity_score),
            'risk_level': risk_level,
            'overall_assessment': overall_assessment,
            'ai_detection': ai_results,
        }
    

if __name__ == "__main__":
    # Example usage
    analyzer = TextAuthenticityAnalyzer()
    print("Text authenticity analyzer initialized.")
    print("Components: Plagiarism Detector + AI Text Detector")