Spaces:

Mithun-999
/

campus-Me

Paused

App Files Files Community

campus-Me / src /ai_engine /detector.py

Mithun-999

Complete AI Academic Document Suite

342973b 2 months ago

raw

history blame contribute delete

9.9 kB

	"""
	AI Detector - Analyze AI detection risks and provide transparency
	"""

	import re
	from typing import Dict, List, Tuple
	import logging

	logger = logging.getLogger(__name__)


	class AIDetector:
	"""
	Analyze generated content for AI detection risks and detection patterns.
	"""

	def __init__(self):
	"""Initialize AI detector."""
	self.ai_indicators = {
	"perfect_structure": r"^(Introduction\|Furthermore\|Conclusion)$",
	"repeated_phrases": r"(?:It is important\|significantly\|Therefore\|however)",
	"formal_language": r"(?:Furthermore\|However\|In conclusion\|Subsequently)",
	"lack_of_contractions": r"(?<!n't)\s(?:is\|are\|have\|has\|do\|does\|will\|would\|could\|should)\s",
	"generic_transitions": r"\b(Also\|Additionally\|Moreover\|Furthermore)\b",
	"overly_perfect_grammar": r"^[A-Z][a-zA-Z\s.,;:]*[.!?]$",
	}

	def analyze_detection_risk(self, content: str) -> Dict[str, any]:
	"""
	Analyze content for AI detection risks.

	Args:
	content: Generated content to analyze

	Returns:
	Dict with detection risk scores and indicators
	"""
	risk_score = 0.0
	indicators = {}

	# Check for perfect structure
	structure_risk = self._check_structure(content)
	indicators["structure"] = structure_risk
	risk_score += structure_risk * 0.15

	# Check for repeated phrases
	repeat_risk = self._check_repetition(content)
	indicators["repetition"] = repeat_risk
	risk_score += repeat_risk * 0.15

	# Check formality level
	formality_risk = self._check_formality(content)
	indicators["formality"] = formality_risk
	risk_score += formality_risk * 0.15

	# Check for contractions
	contraction_risk = self._check_contractions(content)
	indicators["contractions"] = contraction_risk
	risk_score += contraction_risk * 0.15

	# Check for transitions
	transition_risk = self._check_transitions(content)
	indicators["transitions"] = transition_risk
	risk_score += transition_risk * 0.15

	# Check sentence variety
	variety_risk = self._check_variety(content)
	indicators["variety"] = variety_risk
	risk_score += variety_risk * 0.15

	# Check for human elements
	human_score = self._check_human_elements(content)
	indicators["human_elements"] = human_score
	risk_score = max(0, risk_score - human_score * 0.1)

	return {
	"risk_score": min(risk_score, 1.0), # Normalize to 0-1
	"risk_level": self._score_to_level(risk_score),
	"indicators": indicators,
	"recommendation": self._get_recommendation(risk_score),
	}

	def _check_structure(self, content: str) -> float:
	"""Check if content has too-perfect structure."""
	lines = content.split("\n")
	structural_elements = 0

	for line in lines:
	if re.match(self.ai_indicators["perfect_structure"], line.strip(), re.IGNORECASE):
	structural_elements += 1

	# Normalize: more structure elements = higher risk
	return min(structural_elements / max(len(lines), 1), 1.0)

	def _check_repetition(self, content: str) -> float:
	"""Check for repeated phrases."""
	words = content.lower().split()
	phrase_freq = {}

	# Check 3-word phrases
	for i in range(len(words) - 2):
	phrase = " ".join(words[i : i + 3])
	phrase_freq[phrase] = phrase_freq.get(phrase, 0) + 1

	# Calculate repetition score
	repeated_phrases = sum(1 for freq in phrase_freq.values() if freq > 2)
	return min(repeated_phrases / max(len(phrase_freq), 1), 1.0)

	def _check_formality(self, content: str) -> float:
	"""Check for overly formal language."""
	formal_markers = len(re.findall(r"\b(Furthermore\|Moreover\|Subsequently\|In conclusion)\b", content, re.IGNORECASE))
	word_count = len(content.split())

	# High formality can indicate AI generation
	formality_ratio = formal_markers / max(word_count / 100, 1)
	return min(formality_ratio, 1.0)

	def _check_contractions(self, content: str) -> float:
	"""Check for lack of contractions (AI trait)."""
	contractions = len(re.findall(r"\b(don't\|can't\|won't\|it's\|that's\|isn't\|aren't\|haven't\|hasn't)\b", content, re.IGNORECASE))

	# More contractions = more human
	# Lack of contractions = more AI-like
	if len(content.split()) < 100:
	return 0.3 # Short text is hard to judge

	contraction_rate = contractions / len(content.split()) * 100
	# Humans typically use contractions at ~5-15% rate
	if contraction_rate < 2:
	return 0.8 # High risk if no contractions
	elif contraction_rate > 20:
	return 0.3 # Low risk if too many contractions
	else:
	return 0.2 # Normal range

	def _check_transitions(self, content: str) -> float:
	"""Check for overuse of transition phrases."""
	transitions = [
	"furthermore",
	"however",
	"therefore",
	"additionally",
	"moreover",
	"subsequently",
	"consequently",
	"as a result",
	]

	transition_count = 0
	for transition in transitions:
	transition_count += len(re.findall(rf"\b{transition}\b", content, re.IGNORECASE))

	# Normalize: more transitions than reasonable = higher risk
	word_count = len(content.split())
	transition_ratio = transition_count / max(word_count / 100, 1)

	# Humans typically use 1-2 transitions per 100 words
	if transition_ratio > 3:
	return 0.8 # High risk
	elif transition_ratio < 0.5:
	return 0.2 # Low risk
	else:
	return 0.4 # Medium risk

	def _check_variety(self, content: str) -> float:
	"""Check sentence variety."""
	sentences = re.split(r"(?<=[.!?])\s+", content)

	if len(sentences) < 3:
	return 0.2 # Can't judge

	sentence_lengths = [len(s.split()) for s in sentences]
	avg_length = sum(sentence_lengths) / len(sentence_lengths)

	# Calculate variance in sentence length
	variance = sum((length - avg_length) ** 2 for length in sentence_lengths) / len(sentence_lengths)
	std_dev = variance ** 0.5

	# High std dev = more variety = more human
	if std_dev < 2:
	return 0.7 # Low variety = higher AI risk
	elif std_dev > 5:
	return 0.2 # High variety = lower AI risk
	else:
	return 0.4 # Medium

	def _check_human_elements(self, content: str) -> float:
	"""Check for human elements (typos, informal language, etc.)."""
	score = 0

	# Check for typos (deliberate misspellings)
	typos = len(re.findall(r"\b[a-z]{1,2}\b", content)) # Very short words often indicate typos
	if typos > len(content.split()) * 0.02:
	score += 0.2

	# Check for informal language
	informal = len(re.findall(r"\b(like\|really\|definitely\|totally\|basically)\b", content, re.IGNORECASE))
	if informal > 5:
	score += 0.2

	# Check for questions
	questions = len(re.findall(r"\?", content))
	if questions > len(content.split()) / 100:
	score += 0.2

	# Check for exclamations
	exclamations = len(re.findall(r"!", content))
	if exclamations > 0:
	score += 0.1

	# Check for varied punctuation
	if re.search(r"[;:—–]", content):
	score += 0.1

	return min(score, 1.0)

	def _score_to_level(self, score: float) -> str:
	"""Convert score to risk level."""
	if score < 0.2:
	return "Very Low"
	elif score < 0.4:
	return "Low"
	elif score < 0.6:
	return "Medium"
	elif score < 0.8:
	return "High"
	else:
	return "Very High"

	def _get_recommendation(self, score: float) -> str:
	"""Get recommendation based on risk score."""
	if score < 0.3:
	return "Content appears human-like. Low detection risk."
	elif score < 0.5:
	return "Content has some AI characteristics but is reasonably human-like."
	elif score < 0.7:
	return "Content shows notable AI traits. Recommend humanization."
	else:
	return "Content shows high AI characteristics. Strong humanization needed."

	def get_detection_report(self, content: str) -> str:
	"""
	Generate detailed detection report.

	Args:
	content: Content to analyze

	Returns:
	Formatted report
	"""
	analysis = self.analyze_detection_risk(content)

	report = f"""
	AI DETECTION ANALYSIS REPORT
	{'=' * 50}

	Overall Risk Score: {analysis['risk_score']:.1%}
	Risk Level: {analysis['risk_level']}

	DETAILED INDICATORS:
	- Structure Formality: {analysis['indicators']['structure']:.1%}
	- Phrase Repetition: {analysis['indicators']['repetition']:.1%}
	- Excessive Formality: {analysis['indicators']['formality']:.1%}
	- Lack of Contractions: {analysis['indicators']['contractions']:.1%}
	- Transition Usage: {analysis['indicators']['transitions']:.1%}
	- Sentence Variety: {analysis['indicators']['variety']:.1%}
	- Human Elements: {analysis['indicators']['human_elements']:.1%}

	RECOMMENDATION:
	{analysis['recommendation']}

	IMPORTANT:
	This analysis is for educational purposes only. AI detection tools
	are not perfect and can produce false positives/negatives. Using
	this tool responsibly and with proper disclosure is essential.
	{'=' * 50}
	"""

	return report