campus-Me / src /ai_engine /detector.py
Mithun-999's picture
Complete AI Academic Document Suite
342973b
"""
AI Detector - Analyze AI detection risks and provide transparency
"""
import re
from typing import Dict, List, Tuple
import logging
logger = logging.getLogger(__name__)
class AIDetector:
"""
Analyze generated content for AI detection risks and detection patterns.
"""
def __init__(self):
"""Initialize AI detector."""
self.ai_indicators = {
"perfect_structure": r"^(Introduction|Furthermore|Conclusion)$",
"repeated_phrases": r"(?:It is important|significantly|Therefore|however)",
"formal_language": r"(?:Furthermore|However|In conclusion|Subsequently)",
"lack_of_contractions": r"(?<!n't)\s(?:is|are|have|has|do|does|will|would|could|should)\s",
"generic_transitions": r"\b(Also|Additionally|Moreover|Furthermore)\b",
"overly_perfect_grammar": r"^[A-Z][a-zA-Z\s.,;:]*[.!?]$",
}
def analyze_detection_risk(self, content: str) -> Dict[str, any]:
"""
Analyze content for AI detection risks.
Args:
content: Generated content to analyze
Returns:
Dict with detection risk scores and indicators
"""
risk_score = 0.0
indicators = {}
# Check for perfect structure
structure_risk = self._check_structure(content)
indicators["structure"] = structure_risk
risk_score += structure_risk * 0.15
# Check for repeated phrases
repeat_risk = self._check_repetition(content)
indicators["repetition"] = repeat_risk
risk_score += repeat_risk * 0.15
# Check formality level
formality_risk = self._check_formality(content)
indicators["formality"] = formality_risk
risk_score += formality_risk * 0.15
# Check for contractions
contraction_risk = self._check_contractions(content)
indicators["contractions"] = contraction_risk
risk_score += contraction_risk * 0.15
# Check for transitions
transition_risk = self._check_transitions(content)
indicators["transitions"] = transition_risk
risk_score += transition_risk * 0.15
# Check sentence variety
variety_risk = self._check_variety(content)
indicators["variety"] = variety_risk
risk_score += variety_risk * 0.15
# Check for human elements
human_score = self._check_human_elements(content)
indicators["human_elements"] = human_score
risk_score = max(0, risk_score - human_score * 0.1)
return {
"risk_score": min(risk_score, 1.0), # Normalize to 0-1
"risk_level": self._score_to_level(risk_score),
"indicators": indicators,
"recommendation": self._get_recommendation(risk_score),
}
def _check_structure(self, content: str) -> float:
"""Check if content has too-perfect structure."""
lines = content.split("\n")
structural_elements = 0
for line in lines:
if re.match(self.ai_indicators["perfect_structure"], line.strip(), re.IGNORECASE):
structural_elements += 1
# Normalize: more structure elements = higher risk
return min(structural_elements / max(len(lines), 1), 1.0)
def _check_repetition(self, content: str) -> float:
"""Check for repeated phrases."""
words = content.lower().split()
phrase_freq = {}
# Check 3-word phrases
for i in range(len(words) - 2):
phrase = " ".join(words[i : i + 3])
phrase_freq[phrase] = phrase_freq.get(phrase, 0) + 1
# Calculate repetition score
repeated_phrases = sum(1 for freq in phrase_freq.values() if freq > 2)
return min(repeated_phrases / max(len(phrase_freq), 1), 1.0)
def _check_formality(self, content: str) -> float:
"""Check for overly formal language."""
formal_markers = len(re.findall(r"\b(Furthermore|Moreover|Subsequently|In conclusion)\b", content, re.IGNORECASE))
word_count = len(content.split())
# High formality can indicate AI generation
formality_ratio = formal_markers / max(word_count / 100, 1)
return min(formality_ratio, 1.0)
def _check_contractions(self, content: str) -> float:
"""Check for lack of contractions (AI trait)."""
contractions = len(re.findall(r"\b(don't|can't|won't|it's|that's|isn't|aren't|haven't|hasn't)\b", content, re.IGNORECASE))
# More contractions = more human
# Lack of contractions = more AI-like
if len(content.split()) < 100:
return 0.3 # Short text is hard to judge
contraction_rate = contractions / len(content.split()) * 100
# Humans typically use contractions at ~5-15% rate
if contraction_rate < 2:
return 0.8 # High risk if no contractions
elif contraction_rate > 20:
return 0.3 # Low risk if too many contractions
else:
return 0.2 # Normal range
def _check_transitions(self, content: str) -> float:
"""Check for overuse of transition phrases."""
transitions = [
"furthermore",
"however",
"therefore",
"additionally",
"moreover",
"subsequently",
"consequently",
"as a result",
]
transition_count = 0
for transition in transitions:
transition_count += len(re.findall(rf"\b{transition}\b", content, re.IGNORECASE))
# Normalize: more transitions than reasonable = higher risk
word_count = len(content.split())
transition_ratio = transition_count / max(word_count / 100, 1)
# Humans typically use 1-2 transitions per 100 words
if transition_ratio > 3:
return 0.8 # High risk
elif transition_ratio < 0.5:
return 0.2 # Low risk
else:
return 0.4 # Medium risk
def _check_variety(self, content: str) -> float:
"""Check sentence variety."""
sentences = re.split(r"(?<=[.!?])\s+", content)
if len(sentences) < 3:
return 0.2 # Can't judge
sentence_lengths = [len(s.split()) for s in sentences]
avg_length = sum(sentence_lengths) / len(sentence_lengths)
# Calculate variance in sentence length
variance = sum((length - avg_length) ** 2 for length in sentence_lengths) / len(sentence_lengths)
std_dev = variance ** 0.5
# High std dev = more variety = more human
if std_dev < 2:
return 0.7 # Low variety = higher AI risk
elif std_dev > 5:
return 0.2 # High variety = lower AI risk
else:
return 0.4 # Medium
def _check_human_elements(self, content: str) -> float:
"""Check for human elements (typos, informal language, etc.)."""
score = 0
# Check for typos (deliberate misspellings)
typos = len(re.findall(r"\b[a-z]{1,2}\b", content)) # Very short words often indicate typos
if typos > len(content.split()) * 0.02:
score += 0.2
# Check for informal language
informal = len(re.findall(r"\b(like|really|definitely|totally|basically)\b", content, re.IGNORECASE))
if informal > 5:
score += 0.2
# Check for questions
questions = len(re.findall(r"\?", content))
if questions > len(content.split()) / 100:
score += 0.2
# Check for exclamations
exclamations = len(re.findall(r"!", content))
if exclamations > 0:
score += 0.1
# Check for varied punctuation
if re.search(r"[;:—–]", content):
score += 0.1
return min(score, 1.0)
def _score_to_level(self, score: float) -> str:
"""Convert score to risk level."""
if score < 0.2:
return "Very Low"
elif score < 0.4:
return "Low"
elif score < 0.6:
return "Medium"
elif score < 0.8:
return "High"
else:
return "Very High"
def _get_recommendation(self, score: float) -> str:
"""Get recommendation based on risk score."""
if score < 0.3:
return "Content appears human-like. Low detection risk."
elif score < 0.5:
return "Content has some AI characteristics but is reasonably human-like."
elif score < 0.7:
return "Content shows notable AI traits. Recommend humanization."
else:
return "Content shows high AI characteristics. Strong humanization needed."
def get_detection_report(self, content: str) -> str:
"""
Generate detailed detection report.
Args:
content: Content to analyze
Returns:
Formatted report
"""
analysis = self.analyze_detection_risk(content)
report = f"""
AI DETECTION ANALYSIS REPORT
{'=' * 50}
Overall Risk Score: {analysis['risk_score']:.1%}
Risk Level: {analysis['risk_level']}
DETAILED INDICATORS:
- Structure Formality: {analysis['indicators']['structure']:.1%}
- Phrase Repetition: {analysis['indicators']['repetition']:.1%}
- Excessive Formality: {analysis['indicators']['formality']:.1%}
- Lack of Contractions: {analysis['indicators']['contractions']:.1%}
- Transition Usage: {analysis['indicators']['transitions']:.1%}
- Sentence Variety: {analysis['indicators']['variety']:.1%}
- Human Elements: {analysis['indicators']['human_elements']:.1%}
RECOMMENDATION:
{analysis['recommendation']}
IMPORTANT:
This analysis is for educational purposes only. AI detection tools
are not perfect and can produce false positives/negatives. Using
this tool responsibly and with proper disclosure is essential.
{'=' * 50}
"""
return report