"""
Response analyzer for population query results

Analyzes responses to extract positions, sentiment, themes, and statistics.
"""

import re
from typing import List, Dict, Optional, Tuple
from collections import Counter
from dataclasses import dataclass
from enum import Enum

from .sampler import PopulationResponse


class Position(str, Enum):
    """Position on a proposal/question"""
    STRONGLY_SUPPORT = "strongly_support"
    SUPPORT = "support"
    NEUTRAL = "neutral"
    OPPOSE = "oppose"
    STRONGLY_OPPOSE = "strongly_oppose"
    UNCLEAR = "unclear"


class Sentiment(str, Enum):
    """Overall sentiment of response"""
    VERY_POSITIVE = "very_positive"
    POSITIVE = "positive"
    NEUTRAL = "neutral"
    NEGATIVE = "negative"
    VERY_NEGATIVE = "very_negative"
    MIXED = "mixed"


@dataclass
class ResponseAnalysis:
    """Analysis of a single response"""
    variant_id: int
    position: Position
    sentiment: Sentiment
    confidence_score: float  # 0-1
    key_themes: List[str]
    word_count: int


@dataclass
class PopulationResults:
    """Complete analysis results for a population query"""

    # Basic info
    question: str
    population_size: int
    base_persona_name: str

    # Individual analyses
    individual_analyses: List[ResponseAnalysis]

    # Position distribution
    position_counts: Dict[str, int]
    position_percentages: Dict[str, float]

    # Sentiment distribution
    sentiment_counts: Dict[str, int]
    sentiment_percentages: Dict[str, float]

    # Common themes
    top_themes: List[Tuple[str, int]]  # (theme, count)
    theme_clusters: Dict[str, List[int]]  # theme -> variant_ids

    # Statistics
    average_response_length: float
    median_response_length: float
    response_length_range: Tuple[int, int]

    # Sample responses
    sample_support: Optional[str] = None
    sample_oppose: Optional[str] = None
    sample_neutral: Optional[str] = None


class ResponseAnalyzer:
    """Analyze population query responses"""

    # Keywords for position detection
    SUPPORT_KEYWORDS = [
        "support", "agree", "favor", "approve", "endorse", "like", "positive",
        "good idea", "strongly support", "in favor", "beneficial", "excited"
    ]

    OPPOSE_KEYWORDS = [
        "oppose", "disagree", "against", "reject", "disapprove", "don't support",
        "bad idea", "strongly oppose", "concerned", "worried", "problematic"
    ]

    NEUTRAL_KEYWORDS = [
        "neutral", "mixed feelings", "depends", "conditional", "both sides",
        "need more information", "unclear", "unsure"
    ]

    # Keywords for sentiment
    POSITIVE_SENTIMENT = [
        "great", "excellent", "wonderful", "fantastic", "love", "brilliant",
        "perfect", "amazing", "thrilled", "optimistic", "hopeful"
    ]

    NEGATIVE_SENTIMENT = [
        "terrible", "awful", "horrible", "disaster", "hate", "concerned",
        "worried", "fearful", "pessimistic", "unfortunate"
    ]

    # Common urban planning themes
    THEME_KEYWORDS = {
        "affordability": ["affordable", "cost", "expensive", "price", "rent"],
        "sustainability": ["sustainable", "green", "environment", "climate", "carbon"],
        "equity": ["equity", "justice", "fair", "displacement", "gentrification"],
        "density": ["density", "crowded", "compact", "units"],
        "transit": ["transit", "bus", "train", "transportation", "commute"],
        "parking": ["parking", "cars", "vehicles", "garage"],
        "safety": ["safety", "crime", "secure", "dangerous"],
        "community": ["community", "neighbors", "neighborhood", "local"],
        "business": ["business", "economic", "commerce", "jobs", "employment"],
        "housing": ["housing", "homes", "residential", "apartments"],
    }

    def analyze_population(
        self,
        responses: List[PopulationResponse]
    ) -> PopulationResults:
        """
        Analyze all responses from a population query

        Args:
            responses: List of PopulationResponse objects

        Returns:
            PopulationResults with complete analysis
        """
        # Analyze each response individually
        individual_analyses = [
            self.analyze_single_response(r) for r in responses
        ]

        # Calculate distributions
        position_counts = self._count_positions(individual_analyses)
        position_percentages = self._calc_percentages(
            position_counts, len(responses)
        )

        sentiment_counts = self._count_sentiments(individual_analyses)
        sentiment_percentages = self._calc_percentages(
            sentiment_counts, len(responses)
        )

        # Extract themes
        top_themes = self._extract_top_themes(responses, top_n=10)
        theme_clusters = self._cluster_by_themes(responses, individual_analyses)

        # Calculate statistics
        lengths = [len(r.response.split()) for r in responses]
        avg_length = sum(lengths) / len(lengths) if lengths else 0
        sorted_lengths = sorted(lengths)
        median_length = sorted_lengths[len(sorted_lengths) // 2] if sorted_lengths else 0

        # Get sample responses
        samples = self._get_sample_responses(responses, individual_analyses)

        return PopulationResults(
            question=responses[0].question if responses else "",
            population_size=len(responses),
            base_persona_name=responses[0].persona.name.split("_")[0] if responses else "",
            individual_analyses=individual_analyses,
            position_counts=position_counts,
            position_percentages=position_percentages,
            sentiment_counts=sentiment_counts,
            sentiment_percentages=sentiment_percentages,
            top_themes=top_themes,
            theme_clusters=theme_clusters,
            average_response_length=avg_length,
            median_response_length=median_length,
            response_length_range=(min(lengths) if lengths else 0, max(lengths) if lengths else 0),
            sample_support=samples.get("support"),
            sample_oppose=samples.get("oppose"),
            sample_neutral=samples.get("neutral"),
        )

    def analyze_single_response(
        self,
        response: PopulationResponse
    ) -> ResponseAnalysis:
        """Analyze a single response"""
        text = response.response.lower()

        # Detect position
        position, confidence = self._detect_position(text)

        # Detect sentiment
        sentiment = self._detect_sentiment(text)

        # Extract key themes
        themes = self._extract_themes(text)

        # Word count
        word_count = len(response.response.split())

        return ResponseAnalysis(
            variant_id=response.variant_id,
            position=position,
            sentiment=sentiment,
            confidence_score=confidence,
            key_themes=themes,
            word_count=word_count
        )

    def _detect_position(self, text: str) -> Tuple[Position, float]:
        """Detect position from text with confidence score - improved accuracy"""

        # Strong indicators - look at first and last sentences (where positions are usually stated)
        sentences = text.split('.')
        first_sentence = sentences[0].lower() if sentences else ""
        last_sentence = sentences[-1].lower() if len(sentences) > 1 else ""

        # Check for clear positive statements in key positions
        strong_support_phrases = [
            "i support", "i agree", "i approve", "i favor", "i endorse",
            "strongly support", "strongly agree", "in favor of",
            "this is a good", "this is beneficial", "i'm excited"
        ]
        strong_oppose_phrases = [
            "i oppose", "i disagree", "i reject", "i'm against",
            "strongly oppose", "strongly disagree", "i cannot support",
            "i don't support", "i can't support", "this is a bad",
            "i'm concerned", "i'm worried", "i must oppose"
        ]

        # Check first and last sentences for strong indicators (weighted heavily)
        first_last_text = first_sentence + " " + last_sentence
        support_score = 0
        oppose_score = 0

        for phrase in strong_support_phrases:
            if phrase in first_last_text:
                support_score += 3  # Strong weight for clear statements

        for phrase in strong_oppose_phrases:
            if phrase in first_last_text:
                oppose_score += 3  # Strong weight for clear statements

        # Count keyword matches in full text (lower weight)
        support_count = sum(
            1 for keyword in self.SUPPORT_KEYWORDS
            if keyword in text and not any(neg in text for neg in ["don't " + keyword, "can't " + keyword, "won't " + keyword])
        )
        oppose_count = sum(
            1 for keyword in self.OPPOSE_KEYWORDS
            if keyword in text
        )
        neutral_count = sum(
            1 for keyword in self.NEUTRAL_KEYWORDS
            if keyword in text
        )

        # Combine scores
        support_score += support_count
        oppose_score += oppose_count

        total_score = support_score + oppose_score + neutral_count

        if total_score == 0:
            return Position.UNCLEAR, 0.0

        # Determine dominant position
        if support_score > oppose_score and support_score > neutral_count:
            confidence = support_score / max(total_score, 1)
            if support_score >= 5:
                return Position.STRONGLY_SUPPORT, min(confidence, 1.0)
            return Position.SUPPORT, min(confidence, 1.0)

        elif oppose_score > support_score and oppose_score > neutral_count:
            confidence = oppose_score / max(total_score, 1)
            if oppose_score >= 5:
                return Position.STRONGLY_OPPOSE, min(confidence, 1.0)
            return Position.OPPOSE, min(confidence, 1.0)

        elif neutral_count > 0:
            confidence = neutral_count / max(total_score, 1)
            return Position.NEUTRAL, min(confidence, 1.0)

        return Position.UNCLEAR, 0.3

    def _detect_sentiment(self, text: str) -> Sentiment:
        """Detect overall sentiment"""
        positive_count = sum(
            1 for keyword in self.POSITIVE_SENTIMENT
            if keyword in text
        )
        negative_count = sum(
            1 for keyword in self.NEGATIVE_SENTIMENT
            if keyword in text
        )

        if positive_count > 0 and negative_count > 0:
            return Sentiment.MIXED

        if positive_count >= 3:
            return Sentiment.VERY_POSITIVE
        elif positive_count >= 1:
            return Sentiment.POSITIVE

        if negative_count >= 3:
            return Sentiment.VERY_NEGATIVE
        elif negative_count >= 1:
            return Sentiment.NEGATIVE

        return Sentiment.NEUTRAL

    def _extract_themes(self, text: str) -> List[str]:
        """Extract key themes from text"""
        themes = []
        for theme, keywords in self.THEME_KEYWORDS.items():
            if any(keyword in text for keyword in keywords):
                themes.append(theme)
        return themes

    def _count_positions(
        self,
        analyses: List[ResponseAnalysis]
    ) -> Dict[str, int]:
        """Count position occurrences"""
        return dict(Counter(a.position.value for a in analyses))

    def _count_sentiments(
        self,
        analyses: List[ResponseAnalysis]
    ) -> Dict[str, int]:
        """Count sentiment occurrences"""
        return dict(Counter(a.sentiment.value for a in analyses))

    def _calc_percentages(
        self,
        counts: Dict[str, int],
        total: int
    ) -> Dict[str, float]:
        """Calculate percentages from counts"""
        return {
            key: (count / total * 100) if total > 0 else 0
            for key, count in counts.items()
        }

    def _extract_top_themes(
        self,
        responses: List[PopulationResponse],
        top_n: int = 10
    ) -> List[Tuple[str, int]]:
        """Extract most common themes across all responses"""
        all_themes = []
        for response in responses:
            themes = self._extract_themes(response.response.lower())
            all_themes.extend(themes)

        theme_counts = Counter(all_themes)
        return theme_counts.most_common(top_n)

    def _cluster_by_themes(
        self,
        responses: List[PopulationResponse],
        analyses: List[ResponseAnalysis]
    ) -> Dict[str, List[int]]:
        """Group variant IDs by their key themes"""
        clusters = {}
        for analysis in analyses:
            for theme in analysis.key_themes:
                if theme not in clusters:
                    clusters[theme] = []
                clusters[theme].append(analysis.variant_id)
        return clusters

    def _get_sample_responses(
        self,
        responses: List[PopulationResponse],
        analyses: List[ResponseAnalysis]
    ) -> Dict[str, str]:
        """Get sample responses for each position"""
        samples = {}

        # Find first response of each type
        for response, analysis in zip(responses, analyses):
            if "support" not in samples and analysis.position in [Position.SUPPORT, Position.STRONGLY_SUPPORT]:
                samples["support"] = response.response
            elif "oppose" not in samples and analysis.position in [Position.OPPOSE, Position.STRONGLY_OPPOSE]:
                samples["oppose"] = response.response
            elif "neutral" not in samples and analysis.position == Position.NEUTRAL:
                samples["neutral"] = response.response

        return samples