"""
AI Detector - Analyze AI detection risks and provide transparency
"""

import re
from typing import Dict, List, Tuple
import logging

logger = logging.getLogger(__name__)


class AIDetector:
    """
    Analyze generated content for AI detection risks and detection patterns.
    """

    def __init__(self):
        """Initialize AI detector."""
        self.ai_indicators = {
            "perfect_structure": r"^(Introduction|Furthermore|Conclusion)$",
            "repeated_phrases": r"(?:It is important|significantly|Therefore|however)",
            "formal_language": r"(?:Furthermore|However|In conclusion|Subsequently)",
            "lack_of_contractions": r"(?<!n't)\s(?:is|are|have|has|do|does|will|would|could|should)\s",
            "generic_transitions": r"\b(Also|Additionally|Moreover|Furthermore)\b",
            "overly_perfect_grammar": r"^[A-Z][a-zA-Z\s.,;:]*[.!?]$",
        }

    def analyze_detection_risk(self, content: str) -> Dict[str, any]:
        """
        Analyze content for AI detection risks.

        Args:
            content: Generated content to analyze

        Returns:
            Dict with detection risk scores and indicators
        """
        risk_score = 0.0
        indicators = {}

        # Check for perfect structure
        structure_risk = self._check_structure(content)
        indicators["structure"] = structure_risk
        risk_score += structure_risk * 0.15

        # Check for repeated phrases
        repeat_risk = self._check_repetition(content)
        indicators["repetition"] = repeat_risk
        risk_score += repeat_risk * 0.15

        # Check formality level
        formality_risk = self._check_formality(content)
        indicators["formality"] = formality_risk
        risk_score += formality_risk * 0.15

        # Check for contractions
        contraction_risk = self._check_contractions(content)
        indicators["contractions"] = contraction_risk
        risk_score += contraction_risk * 0.15

        # Check for transitions
        transition_risk = self._check_transitions(content)
        indicators["transitions"] = transition_risk
        risk_score += transition_risk * 0.15

        # Check sentence variety
        variety_risk = self._check_variety(content)
        indicators["variety"] = variety_risk
        risk_score += variety_risk * 0.15

        # Check for human elements
        human_score = self._check_human_elements(content)
        indicators["human_elements"] = human_score
        risk_score = max(0, risk_score - human_score * 0.1)

        return {
            "risk_score": min(risk_score, 1.0),  # Normalize to 0-1
            "risk_level": self._score_to_level(risk_score),
            "indicators": indicators,
            "recommendation": self._get_recommendation(risk_score),
        }

    def _check_structure(self, content: str) -> float:
        """Check if content has too-perfect structure."""
        lines = content.split("\n")
        structural_elements = 0

        for line in lines:
            if re.match(self.ai_indicators["perfect_structure"], line.strip(), re.IGNORECASE):
                structural_elements += 1

        # Normalize: more structure elements = higher risk
        return min(structural_elements / max(len(lines), 1), 1.0)

    def _check_repetition(self, content: str) -> float:
        """Check for repeated phrases."""
        words = content.lower().split()
        phrase_freq = {}

        # Check 3-word phrases
        for i in range(len(words) - 2):
            phrase = " ".join(words[i : i + 3])
            phrase_freq[phrase] = phrase_freq.get(phrase, 0) + 1

        # Calculate repetition score
        repeated_phrases = sum(1 for freq in phrase_freq.values() if freq > 2)
        return min(repeated_phrases / max(len(phrase_freq), 1), 1.0)

    def _check_formality(self, content: str) -> float:
        """Check for overly formal language."""
        formal_markers = len(re.findall(r"\b(Furthermore|Moreover|Subsequently|In conclusion)\b", content, re.IGNORECASE))
        word_count = len(content.split())

        # High formality can indicate AI generation
        formality_ratio = formal_markers / max(word_count / 100, 1)
        return min(formality_ratio, 1.0)

    def _check_contractions(self, content: str) -> float:
        """Check for lack of contractions (AI trait)."""
        contractions = len(re.findall(r"\b(don't|can't|won't|it's|that's|isn't|aren't|haven't|hasn't)\b", content, re.IGNORECASE))

        # More contractions = more human
        # Lack of contractions = more AI-like
        if len(content.split()) < 100:
            return 0.3  # Short text is hard to judge

        contraction_rate = contractions / len(content.split()) * 100
        # Humans typically use contractions at ~5-15% rate
        if contraction_rate < 2:
            return 0.8  # High risk if no contractions
        elif contraction_rate > 20:
            return 0.3  # Low risk if too many contractions
        else:
            return 0.2  # Normal range

    def _check_transitions(self, content: str) -> float:
        """Check for overuse of transition phrases."""
        transitions = [
            "furthermore",
            "however",
            "therefore",
            "additionally",
            "moreover",
            "subsequently",
            "consequently",
            "as a result",
        ]

        transition_count = 0
        for transition in transitions:
            transition_count += len(re.findall(rf"\b{transition}\b", content, re.IGNORECASE))

        # Normalize: more transitions than reasonable = higher risk
        word_count = len(content.split())
        transition_ratio = transition_count / max(word_count / 100, 1)

        # Humans typically use 1-2 transitions per 100 words
        if transition_ratio > 3:
            return 0.8  # High risk
        elif transition_ratio < 0.5:
            return 0.2  # Low risk
        else:
            return 0.4  # Medium risk

    def _check_variety(self, content: str) -> float:
        """Check sentence variety."""
        sentences = re.split(r"(?<=[.!?])\s+", content)

        if len(sentences) < 3:
            return 0.2  # Can't judge

        sentence_lengths = [len(s.split()) for s in sentences]
        avg_length = sum(sentence_lengths) / len(sentence_lengths)

        # Calculate variance in sentence length
        variance = sum((length - avg_length) ** 2 for length in sentence_lengths) / len(sentence_lengths)
        std_dev = variance ** 0.5

        # High std dev = more variety = more human
        if std_dev < 2:
            return 0.7  # Low variety = higher AI risk
        elif std_dev > 5:
            return 0.2  # High variety = lower AI risk
        else:
            return 0.4  # Medium

    def _check_human_elements(self, content: str) -> float:
        """Check for human elements (typos, informal language, etc.)."""
        score = 0

        # Check for typos (deliberate misspellings)
        typos = len(re.findall(r"\b[a-z]{1,2}\b", content))  # Very short words often indicate typos
        if typos > len(content.split()) * 0.02:
            score += 0.2

        # Check for informal language
        informal = len(re.findall(r"\b(like|really|definitely|totally|basically)\b", content, re.IGNORECASE))
        if informal > 5:
            score += 0.2

        # Check for questions
        questions = len(re.findall(r"\?", content))
        if questions > len(content.split()) / 100:
            score += 0.2

        # Check for exclamations
        exclamations = len(re.findall(r"!", content))
        if exclamations > 0:
            score += 0.1

        # Check for varied punctuation
        if re.search(r"[;:—–]", content):
            score += 0.1

        return min(score, 1.0)

    def _score_to_level(self, score: float) -> str:
        """Convert score to risk level."""
        if score < 0.2:
            return "Very Low"
        elif score < 0.4:
            return "Low"
        elif score < 0.6:
            return "Medium"
        elif score < 0.8:
            return "High"
        else:
            return "Very High"

    def _get_recommendation(self, score: float) -> str:
        """Get recommendation based on risk score."""
        if score < 0.3:
            return "Content appears human-like. Low detection risk."
        elif score < 0.5:
            return "Content has some AI characteristics but is reasonably human-like."
        elif score < 0.7:
            return "Content shows notable AI traits. Recommend humanization."
        else:
            return "Content shows high AI characteristics. Strong humanization needed."

    def get_detection_report(self, content: str) -> str:
        """
        Generate detailed detection report.

        Args:
            content: Content to analyze

        Returns:
            Formatted report
        """
        analysis = self.analyze_detection_risk(content)

        report = f"""
AI DETECTION ANALYSIS REPORT
{'=' * 50}

Overall Risk Score: {analysis['risk_score']:.1%}
Risk Level: {analysis['risk_level']}

DETAILED INDICATORS:
- Structure Formality: {analysis['indicators']['structure']:.1%}
- Phrase Repetition: {analysis['indicators']['repetition']:.1%}
- Excessive Formality: {analysis['indicators']['formality']:.1%}
- Lack of Contractions: {analysis['indicators']['contractions']:.1%}
- Transition Usage: {analysis['indicators']['transitions']:.1%}
- Sentence Variety: {analysis['indicators']['variety']:.1%}
- Human Elements: {analysis['indicators']['human_elements']:.1%}

RECOMMENDATION:
{analysis['recommendation']}

IMPORTANT:
This analysis is for educational purposes only. AI detection tools
are not perfect and can produce false positives/negatives. Using
this tool responsibly and with proper disclosure is essential.
{'=' * 50}
        """

        return report