""" AI Detector - Analyze AI detection risks and provide transparency """ import re from typing import Dict, List, Tuple import logging logger = logging.getLogger(__name__) class AIDetector: """ Analyze generated content for AI detection risks and detection patterns. """ def __init__(self): """Initialize AI detector.""" self.ai_indicators = { "perfect_structure": r"^(Introduction|Furthermore|Conclusion)$", "repeated_phrases": r"(?:It is important|significantly|Therefore|however)", "formal_language": r"(?:Furthermore|However|In conclusion|Subsequently)", "lack_of_contractions": r"(? Dict[str, any]: """ Analyze content for AI detection risks. Args: content: Generated content to analyze Returns: Dict with detection risk scores and indicators """ risk_score = 0.0 indicators = {} # Check for perfect structure structure_risk = self._check_structure(content) indicators["structure"] = structure_risk risk_score += structure_risk * 0.15 # Check for repeated phrases repeat_risk = self._check_repetition(content) indicators["repetition"] = repeat_risk risk_score += repeat_risk * 0.15 # Check formality level formality_risk = self._check_formality(content) indicators["formality"] = formality_risk risk_score += formality_risk * 0.15 # Check for contractions contraction_risk = self._check_contractions(content) indicators["contractions"] = contraction_risk risk_score += contraction_risk * 0.15 # Check for transitions transition_risk = self._check_transitions(content) indicators["transitions"] = transition_risk risk_score += transition_risk * 0.15 # Check sentence variety variety_risk = self._check_variety(content) indicators["variety"] = variety_risk risk_score += variety_risk * 0.15 # Check for human elements human_score = self._check_human_elements(content) indicators["human_elements"] = human_score risk_score = max(0, risk_score - human_score * 0.1) return { "risk_score": min(risk_score, 1.0), # Normalize to 0-1 "risk_level": self._score_to_level(risk_score), "indicators": indicators, "recommendation": self._get_recommendation(risk_score), } def _check_structure(self, content: str) -> float: """Check if content has too-perfect structure.""" lines = content.split("\n") structural_elements = 0 for line in lines: if re.match(self.ai_indicators["perfect_structure"], line.strip(), re.IGNORECASE): structural_elements += 1 # Normalize: more structure elements = higher risk return min(structural_elements / max(len(lines), 1), 1.0) def _check_repetition(self, content: str) -> float: """Check for repeated phrases.""" words = content.lower().split() phrase_freq = {} # Check 3-word phrases for i in range(len(words) - 2): phrase = " ".join(words[i : i + 3]) phrase_freq[phrase] = phrase_freq.get(phrase, 0) + 1 # Calculate repetition score repeated_phrases = sum(1 for freq in phrase_freq.values() if freq > 2) return min(repeated_phrases / max(len(phrase_freq), 1), 1.0) def _check_formality(self, content: str) -> float: """Check for overly formal language.""" formal_markers = len(re.findall(r"\b(Furthermore|Moreover|Subsequently|In conclusion)\b", content, re.IGNORECASE)) word_count = len(content.split()) # High formality can indicate AI generation formality_ratio = formal_markers / max(word_count / 100, 1) return min(formality_ratio, 1.0) def _check_contractions(self, content: str) -> float: """Check for lack of contractions (AI trait).""" contractions = len(re.findall(r"\b(don't|can't|won't|it's|that's|isn't|aren't|haven't|hasn't)\b", content, re.IGNORECASE)) # More contractions = more human # Lack of contractions = more AI-like if len(content.split()) < 100: return 0.3 # Short text is hard to judge contraction_rate = contractions / len(content.split()) * 100 # Humans typically use contractions at ~5-15% rate if contraction_rate < 2: return 0.8 # High risk if no contractions elif contraction_rate > 20: return 0.3 # Low risk if too many contractions else: return 0.2 # Normal range def _check_transitions(self, content: str) -> float: """Check for overuse of transition phrases.""" transitions = [ "furthermore", "however", "therefore", "additionally", "moreover", "subsequently", "consequently", "as a result", ] transition_count = 0 for transition in transitions: transition_count += len(re.findall(rf"\b{transition}\b", content, re.IGNORECASE)) # Normalize: more transitions than reasonable = higher risk word_count = len(content.split()) transition_ratio = transition_count / max(word_count / 100, 1) # Humans typically use 1-2 transitions per 100 words if transition_ratio > 3: return 0.8 # High risk elif transition_ratio < 0.5: return 0.2 # Low risk else: return 0.4 # Medium risk def _check_variety(self, content: str) -> float: """Check sentence variety.""" sentences = re.split(r"(?<=[.!?])\s+", content) if len(sentences) < 3: return 0.2 # Can't judge sentence_lengths = [len(s.split()) for s in sentences] avg_length = sum(sentence_lengths) / len(sentence_lengths) # Calculate variance in sentence length variance = sum((length - avg_length) ** 2 for length in sentence_lengths) / len(sentence_lengths) std_dev = variance ** 0.5 # High std dev = more variety = more human if std_dev < 2: return 0.7 # Low variety = higher AI risk elif std_dev > 5: return 0.2 # High variety = lower AI risk else: return 0.4 # Medium def _check_human_elements(self, content: str) -> float: """Check for human elements (typos, informal language, etc.).""" score = 0 # Check for typos (deliberate misspellings) typos = len(re.findall(r"\b[a-z]{1,2}\b", content)) # Very short words often indicate typos if typos > len(content.split()) * 0.02: score += 0.2 # Check for informal language informal = len(re.findall(r"\b(like|really|definitely|totally|basically)\b", content, re.IGNORECASE)) if informal > 5: score += 0.2 # Check for questions questions = len(re.findall(r"\?", content)) if questions > len(content.split()) / 100: score += 0.2 # Check for exclamations exclamations = len(re.findall(r"!", content)) if exclamations > 0: score += 0.1 # Check for varied punctuation if re.search(r"[;:—–]", content): score += 0.1 return min(score, 1.0) def _score_to_level(self, score: float) -> str: """Convert score to risk level.""" if score < 0.2: return "Very Low" elif score < 0.4: return "Low" elif score < 0.6: return "Medium" elif score < 0.8: return "High" else: return "Very High" def _get_recommendation(self, score: float) -> str: """Get recommendation based on risk score.""" if score < 0.3: return "Content appears human-like. Low detection risk." elif score < 0.5: return "Content has some AI characteristics but is reasonably human-like." elif score < 0.7: return "Content shows notable AI traits. Recommend humanization." else: return "Content shows high AI characteristics. Strong humanization needed." def get_detection_report(self, content: str) -> str: """ Generate detailed detection report. Args: content: Content to analyze Returns: Formatted report """ analysis = self.analyze_detection_risk(content) report = f""" AI DETECTION ANALYSIS REPORT {'=' * 50} Overall Risk Score: {analysis['risk_score']:.1%} Risk Level: {analysis['risk_level']} DETAILED INDICATORS: - Structure Formality: {analysis['indicators']['structure']:.1%} - Phrase Repetition: {analysis['indicators']['repetition']:.1%} - Excessive Formality: {analysis['indicators']['formality']:.1%} - Lack of Contractions: {analysis['indicators']['contractions']:.1%} - Transition Usage: {analysis['indicators']['transitions']:.1%} - Sentence Variety: {analysis['indicators']['variety']:.1%} - Human Elements: {analysis['indicators']['human_elements']:.1%} RECOMMENDATION: {analysis['recommendation']} IMPORTANT: This analysis is for educational purposes only. AI detection tools are not perfect and can produce false positives/negatives. Using this tool responsibly and with proper disclosure is essential. {'=' * 50} """ return report