Spaces:

ranamhamoud
/

Authenticity

Sleeping

App Files Files Community

Authenticity / text_analyzer.py

ranamhamoud

Upload folder using huggingface_hub

01b5c48 unverified 2 months ago

raw

history blame

4.79 kB

	import re
	import requests
	from typing import Dict, List, Tuple, Optional
	import torch
	from transformers import (
	AutoTokenizer,
	AutoModelForSequenceClassification,
	RobertaTokenizer,
	RobertaForSequenceClassification
	)
	import numpy as np
	from collections import Counter
	import warnings
	warnings.filterwarnings("ignore")

	try:
	from plagiarism_detection import ai_plagiarism_detection
	DESKLIB_AVAILABLE = True
	except ImportError:
	DESKLIB_AVAILABLE = False
	print("Warning: plagiarism_detection module not found. Using fallback AI detection.")




	class AITextDetector:
	def __init__(self, device: str = None, threshold: float = 0.78):
	self.threshold = threshold

	if not DESKLIB_AVAILABLE:
	print("Warning: plagiarism_detection module not found. AI detection will not be available.")
	print("Ensure plagiarism_detection.py is in the same directory.")
	self.available = False
	else:
	print(f"Using Desklib AI text detector (threshold: {self.threshold})")
	self.available = True

	def detect_ai_text(self, text: str) -> Dict:

	if not self.available:
	# Return neutral result if Desklib not available
	return {
	'ai_generated': False,
	'confidence': 0.5,
	'indicators': [],
	'interpretation': "AI detection not available. Install plagiarism_detection module.",
	'model_used': 'N/A (module not found)'
	}

	# Use Desklib AI detector
	try:
	probability, ai_detected = ai_plagiarism_detection(
	text,
	threshold=self.threshold,
	show_results=False
	)

	return {
	'ai_generated': ai_detected,
	'confidence': float(probability),
	'indicators': self._identify_ai_indicators(probability),
	'interpretation': self._interpret_ai_detection(probability),
	'model_used': 'Desklib AI Detector v1.01'
	}
	except Exception as e:
	print(f"Error in AI detection: {e}")
	return {
	'ai_generated': False,
	'confidence': 0.5,
	'indicators': [],
	'interpretation': f"AI detection error: {str(e)}",
	'model_used': 'Error'
	}


	def _identify_ai_indicators(self, probability: float) -> List[str]:
	indicators = []

	if probability > 0.9:
	indicators.append("Very high AI probability (>90%)")
	elif probability > 0.7:
	indicators.append("High AI probability (70-90%)")
	elif probability > self.threshold:
	indicators.append(f"AI detected above threshold ({self.threshold*100:.0f}%)")

	return indicators

	def _interpret_ai_detection(self, score: float) -> str:
	interpretation = f"AI-Generated Text Detection:\n\n"
	interpretation += f"- AI Probability Score: {score*100:.1f}%\n"
	interpretation += f"- Detection Threshold: {self.threshold*100:.0f}%\n"

	return interpretation


	class TextAuthenticityAnalyzer:

	def __init__(self, device: str = None, ai_threshold: float = 0.78):

	self.ai_detector = AITextDetector(device=device, threshold=ai_threshold)

	def analyze(self, text: str) -> Dict:
	# Run AI detection
	ai_results = self.ai_detector.detect_ai_text(text)

	# Calculate overall authenticity score based on AI detection
	ai_penalty = ai_results['confidence']
	authenticity_score = 1.0 - ai_penalty

	# Determine overall assessment
	if authenticity_score < 0.3:
	overall_assessment = "HIGH RISK: Strong AI-generated text indicators"
	risk_level = "high"
	elif authenticity_score < 0.5:
	overall_assessment = "MODERATE RISK: Likely AI-generated"
	risk_level = "moderate"
	elif authenticity_score < 0.7:
	overall_assessment = "LOW RISK: Some AI characteristics"
	risk_level = "low"
	else:
	overall_assessment = "AUTHENTIC: Text appears human-written"
	risk_level = "minimal"

	return {
	'authenticity_score': float(authenticity_score),
	'risk_level': risk_level,
	'overall_assessment': overall_assessment,
	'ai_detection': ai_results,
	}


	if __name__ == "__main__":
	# Example usage
	analyzer = TextAuthenticityAnalyzer()
	print("Text authenticity analyzer initialized.")
	print("Components: Plagiarism Detector + AI Text Detector")