"""
LLM Service for generating human-readable explanations of model predictions.

Uses Google Gemini to translate model-space evidence (heatmaps, attention maps)
into human-understandable hypotheses with proper hedging language.
"""

import json
import base64
from typing import Any, Dict, List, Optional
from functools import lru_cache

from app.core.config import get_settings
from app.core.logging import get_logger

logger = get_logger(__name__)

# Model type descriptions for the LLM
MODEL_TYPE_DESCRIPTIONS = {
    "cnn-transfer": {
        "type": "rgb_texture_cnn",
        "description": "Analyzes RGB pixel textures, colors, and fine details at multiple scales",
        "typical_cues": ["skin texture uniformity", "shading gradients", "fine detail at boundaries"]
    },
    "vit-base": {
        "type": "patch_consistency_vit",
        "description": "Analyzes global consistency and relationships between image patches",
        "typical_cues": ["lighting consistency", "background blur patterns", "patch-level coherence"]
    },
    "deit-distilled": {
        "type": "patch_consistency_vit",
        "description": "Analyzes global consistency with knowledge distillation for refined attention",
        "typical_cues": ["global-local consistency", "texture repetition", "depth coherence"]
    },
    "gradfield-cnn": {
        "type": "edge_coherence_cnn",
        "description": "Analyzes edge patterns, boundary sharpness, and gradient field coherence",
        "typical_cues": ["edge smoothness", "boundary naturalness", "gradient consistency"]
    }
}

# User-facing display information for each model (used in frontend)
MODEL_DISPLAY_INFO = {
    "cnn-transfer": {
        "display_name": "Texture Analysis",
        "short_name": "CNN",
        "method_name": "Grad-CAM",
        "method_description": "Gradient-weighted Class Activation Mapping",
        "educational_text": (
            "This model examines fine-grained texture patterns and pixel-level details. "
            "The heatmap highlights regions where texture anomalies were detected. "
            "AI-generated images often have subtle texture inconsistencies - overly smooth skin, "
            "unnatural fabric patterns, or repetitive background textures that this model can detect."
        ),
        "what_it_looks_for": [
            "Skin texture uniformity vs natural variation",
            "Fine detail preservation at edges and boundaries",
            "Color gradient smoothness and shading realism"
        ]
    },
    "vit-base": {
        "display_name": "Patch Consistency",
        "short_name": "ViT",
        "method_name": "Attention Rollout",
        "method_description": "Aggregated attention across all transformer layers",
        "educational_text": (
            "This model analyzes how different parts of the image relate to each other. "
            "The heatmap shows which image patches drew the most attention. "
            "AI-generated images may have inconsistencies between regions - "
            "mismatched lighting, perspective errors, or elements that don't quite fit together."
        ),
        "what_it_looks_for": [
            "Consistency of lighting across the image",
            "Spatial relationships between objects",
            "Background-foreground coherence"
        ]
    },
    "deit-distilled": {
        "display_name": "Global Structure",
        "short_name": "DeiT",
        "method_name": "Attention Rollout",
        "method_description": "Distilled attention patterns from teacher model",
        "educational_text": (
            "This model uses knowledge distillation to detect global structural anomalies. "
            "The heatmap reveals areas where the overall image structure seems inconsistent. "
            "AI-generated images sometimes have subtle global issues - "
            "like depth inconsistencies or anatomical improbabilities."
        ),
        "what_it_looks_for": [
            "Global-to-local consistency",
            "Depth and perspective coherence",
            "Structural plausibility of objects"
        ]
    },
    "gradfield-cnn": {
        "display_name": "Edge Coherence",
        "short_name": "GradField",
        "method_name": "Gradient Field Analysis",
        "method_description": "Analysis of image gradient patterns and edge transitions",
        "educational_text": (
            "This model analyzes edge patterns and how colors transition across boundaries. "
            "The heatmap highlights areas with unusual edge characteristics. "
            "AI-generated images often have telltale edge artifacts - "
            "unnaturally sharp or blurry boundaries, inconsistent edge directions, or gradient anomalies."
        ),
        "what_it_looks_for": [
            "Edge sharpness consistency",
            "Natural boundary transitions",
            "Gradient flow coherence"
        ]
    }
}

def get_model_display_info(model_name: str) -> Dict[str, Any]:
    """Get display info for a model, with fallback for unknown models."""
    return MODEL_DISPLAY_INFO.get(model_name, {
        "display_name": model_name.replace("-", " ").title(),
        "short_name": model_name[:3].upper(),
        "method_name": "Analysis",
        "method_description": "Model-specific analysis",
        "educational_text": f"This model ({model_name}) analyzes the image for signs of AI generation.",
        "what_it_looks_for": ["Image anomalies", "Generation artifacts"]
    })

SYSTEM_PROMPT = """You are an AI image analysis interpreter for a deepfake detection system. Your role is to translate model evidence into human-understandable hypotheses.

CRITICAL RULES:
1. NEVER claim certainty. Always use hedging language: "may", "suggests", "possible", "could indicate", "might show"
2. ALWAYS cite which model's evidence supports each statement (e.g., "based on CNN heatmap focus")
3. If evidence is diffuse or unclear, say so explicitly: "Evidence is spread across the image; interpretation is less certain"
4. Provide user-checkable observations, not definitive claims about what IS fake
5. Remember: you are explaining what the MODEL focused on, not proving the image is fake

MODEL TYPES AND WHAT THEY ANALYZE:
- CNN (rgb_texture_cnn): Pixel textures, colors, fine details - looks for texture anomalies
- ViT/DeiT (patch_consistency_vit): Global consistency, patch relationships - looks for coherence issues
- GradField (edge_coherence_cnn): Edge patterns, boundaries, gradient fields - looks for edge artifacts

OUTPUT FORMAT:
You must respond with valid JSON matching this exact structure:
{
  "per_model_insights": {
    "<model_name>": {
      "what_model_relied_on": "One sentence describing the model's focus area",
      "possible_cues": ["Cue 1 with hedging (based on evidence)", "Cue 2...", "Cue 3..."],
      "confidence_note": "Note about confidence level"
    }
  },
  "consensus_summary": [
    "Bullet 1 about model agreement/disagreement",
    "Bullet 2 about overall evidence pattern"
  ]
}"""


class LLMService:
    """Service for generating LLM-powered explanations of model predictions."""
    
    def __init__(self):
        self._client = None
        self._model_name = None
        self._enabled = False
        self._initialize()
    
    def _initialize(self):
        """Initialize the Gemini client if API key is available."""
        settings = get_settings()
        
        if not settings.llm_enabled:
            logger.info("LLM explanations disabled: No GOOGLE_API_KEY configured")
            return
        
        try:
            from google import genai
            self._client = genai.Client(api_key=settings.GOOGLE_API_KEY)
            self._model_name = settings.GEMINI_MODEL
            self._enabled = True
            logger.info(f"LLM service initialized with model: {settings.GEMINI_MODEL}")
        except ImportError:
            logger.warning("google-genai package not installed. LLM explanations disabled.")
        except Exception as e:
            logger.error(f"Failed to initialize LLM service: {e}")
    
    @property
    def enabled(self) -> bool:
        """Check if LLM explanations are available."""
        return self._enabled
    
    def build_evidence_packet(
        self,
        model_name: str,
        model_output: Dict[str, Any]
    ) -> Dict[str, Any]:
        """
        Build a structured evidence packet from model output.
        
        Args:
            model_name: Name of the model (e.g., "cnn-transfer")
            model_output: Raw output from the model's predict() method
            
        Returns:
            Structured evidence packet for LLM consumption
        """
        model_info = MODEL_TYPE_DESCRIPTIONS.get(model_name, {
            "type": "unknown",
            "description": "Unknown model type",
            "typical_cues": []
        })
        
        return {
            "model_name": model_name,
            "model_type": model_info["type"],
            "model_description": model_info["description"],
            "prob_fake": model_output.get("prob_fake", 0.0),
            "prediction": model_output.get("pred", "unknown"),
            "focus_summary": model_output.get("focus_summary", "focus pattern not available"),
            "explainability_type": model_output.get("explainability_type", "unknown"),
            "typical_cues_for_this_model": model_info["typical_cues"]
        }
    
    def generate_explanation(
        self,
        original_image_b64: Optional[str],
        submodel_outputs: Dict[str, Dict[str, Any]],
        include_images: bool = True
    ) -> Optional[Dict[str, Any]]:
        """
        Generate LLM explanation for model predictions.
        
        Args:
            original_image_b64: Base64-encoded original image (optional)
            submodel_outputs: Dict mapping model names to their outputs
            include_images: Whether to include images in the prompt (uses vision model)
            
        Returns:
            ExplanationResult dict or None if generation fails
        """
        if not self._enabled:
            logger.warning("LLM explanations requested but service not enabled")
            return None
        
        try:
            # Build evidence packets for all models
            evidence_packets = {}
            for model_name, output in submodel_outputs.items():
                evidence_packets[model_name] = self.build_evidence_packet(model_name, output)
            
            # Build the prompt
            user_prompt = self._build_user_prompt(evidence_packets, submodel_outputs)
            
            # Build content parts (text + optional images)
            content_parts = []
            
            # Add images if requested and available
            if include_images:
                # Add original image
                if original_image_b64:
                    content_parts.append({
                        "mime_type": "image/png",
                        "data": original_image_b64
                    })
                    content_parts.append("Original image shown above.\n\n")
                
                # Add heatmap overlays for each model
                for model_name, output in submodel_outputs.items():
                    if output.get("heatmap_base64"):
                        content_parts.append({
                            "mime_type": "image/png",
                            "data": output["heatmap_base64"]
                        })
                        content_parts.append(f"Heatmap overlay for {model_name} shown above.\n\n")
            
            # Add the main text prompt
            content_parts.append(user_prompt)
            
            # Call the LLM using new google.genai API
            logger.info("Generating LLM explanation...")
            from google.genai import types
            
            # Build the parts list for the new API
            parts = []
            for part in content_parts:
                if isinstance(part, dict) and "mime_type" in part:
                    # Image part
                    parts.append(types.Part.from_bytes(
                        data=__import__('base64').b64decode(part["data"]),
                        mime_type=part["mime_type"]
                    ))
                else:
                    # Text part
                    parts.append(types.Part.from_text(text=str(part)))
            
            response = self._client.models.generate_content(
                model=self._model_name,
                contents=[SYSTEM_PROMPT] + parts,
                config=types.GenerateContentConfig(
                    temperature=0.3,
                    top_p=0.8,
                    max_output_tokens=2048,
                )
            )
            
            # Parse the response
            return self._parse_response(response.text, list(submodel_outputs.keys()))
            
        except Exception as e:
            logger.error(f"Failed to generate LLM explanation: {e}")
            return None
    
    def _build_user_prompt(
        self,
        evidence_packets: Dict[str, Dict],
        submodel_outputs: Dict[str, Dict]
    ) -> str:
        """Build the user prompt with evidence data."""
        
        # Calculate some aggregate stats
        prob_fakes = [p["prob_fake"] for p in evidence_packets.values()]
        avg_prob = sum(prob_fakes) / len(prob_fakes) if prob_fakes else 0
        agreement = "Models generally agree" if max(prob_fakes) - min(prob_fakes) < 0.3 else "Models show disagreement"
        
        prompt = f"""I have {len(evidence_packets)} deepfake detection models analyzing an image.

EVIDENCE FROM EACH MODEL:
{json.dumps(evidence_packets, indent=2)}

AGGREGATE ANALYSIS:
- Average fake probability: {avg_prob:.1%}
- Model agreement: {agreement}
- Probability range: {min(prob_fakes):.1%} to {max(prob_fakes):.1%}

TASK:
For each model, provide:
1. "what_model_relied_on": One sentence describing where the model focused (cite the focus_summary)
2. "possible_cues": 2-4 possible visual cues a human could check, phrased as hypotheses with hedging language
3. "confidence_note": Assessment based on prob_fake value and focus pattern

Then provide "consensus_summary": 2-3 bullets about where models agreed/disagreed and overall evidence quality.

Remember: Use hedging language ("may", "suggests", "possible"). Never claim certainty.

Respond with valid JSON only, no markdown formatting."""

        return prompt
    
    def _parse_response(
        self,
        response_text: str,
        expected_models: List[str]
    ) -> Optional[Dict[str, Any]]:
        """Parse and validate the LLM response."""
        
        try:
            # Try to extract JSON from the response
            # Sometimes the model wraps it in markdown code blocks
            text = response_text.strip()
            if text.startswith("```"):
                # Remove markdown code block
                lines = text.split("\n")
                text = "\n".join(lines[1:-1] if lines[-1] == "```" else lines[1:])
                text = text.strip()
            
            result = json.loads(text)
            
            # Validate structure
            if "per_model_insights" not in result:
                logger.warning("LLM response missing per_model_insights")
                result["per_model_insights"] = {}
            
            if "consensus_summary" not in result:
                logger.warning("LLM response missing consensus_summary")
                result["consensus_summary"] = ["Model analysis completed."]
            
            # Ensure all expected models have entries (fill with defaults if missing)
            for model_name in expected_models:
                if model_name not in result["per_model_insights"]:
                    result["per_model_insights"][model_name] = {
                        "what_model_relied_on": f"The {model_name} model analyzed the image.",
                        "possible_cues": ["Evidence details not available for this model."],
                        "confidence_note": "Unable to generate detailed analysis."
                    }
            
            return result
            
        except json.JSONDecodeError as e:
            logger.error(f"Failed to parse LLM response as JSON: {e}")
            logger.debug(f"Raw response: {response_text[:500]}...")
            
            # Return a fallback response
            return {
                "per_model_insights": {
                    model: {
                        "what_model_relied_on": f"The {model} model analyzed the image.",
                        "possible_cues": ["Unable to generate detailed explanation."],
                        "confidence_note": "LLM response parsing failed."
                    }
                    for model in expected_models
                },
                "consensus_summary": ["Model analysis completed but detailed explanation unavailable."]
            }
    
    def generate_single_model_explanation(
        self,
        model_name: str,
        prob_fake: float,
        original_image_b64: Optional[str] = None,
        heatmap_b64: Optional[str] = None,
        focus_summary: Optional[str] = None,
        contribution_percentage: Optional[float] = None
    ) -> Optional[Dict[str, Any]]:
        """
        Generate LLM explanation for a single model's prediction.
        
        This is more token-efficient than generating all explanations at once,
        and allows users to request explanations on-demand per model.
        
        Args:
            model_name: Name of the model (e.g., "cnn-transfer")
            prob_fake: The model's fake probability
            original_image_b64: Base64-encoded original image
            heatmap_b64: Base64-encoded heatmap overlay
            focus_summary: Text summary of where model focused
            contribution_percentage: How much this model contributed to fusion decision
            
        Returns:
            Dict with insight for this model or None if generation fails
        """
        if not self._enabled:
            logger.warning("LLM explanations requested but service not enabled")
            return None
        
        try:
            # Get display info for this model
            display_info = get_model_display_info(model_name)
            model_type_info = MODEL_TYPE_DESCRIPTIONS.get(model_name, {
                "type": "unknown",
                "description": "Unknown model type",
                "typical_cues": []
            })
            
            # Build focused prompt for single model
            prompt = f"""You are analyzing a single model's output from a deepfake detection system.

MODEL INFORMATION:
- Display Name: {display_info['display_name']}
- Analysis Method: {display_info['method_name']} ({display_info['method_description']})
- What It Analyzes: {model_type_info['description']}
- Typical Cues It Detects: {', '.join(model_type_info['typical_cues'])}

DETECTION RESULTS:
- Fake Probability: {prob_fake:.1%}
- Prediction: {"Likely AI-Generated" if prob_fake >= 0.5 else "Likely Real"}
- Focus Summary: {focus_summary or "Not available"}
{f"- Contribution to Final Decision: {contribution_percentage:.1f}%" if contribution_percentage else ""}

The heatmap shows where this model focused its attention. Brighter/warmer colors indicate higher attention.

TASK:
Analyze the image and heatmap to explain what this specific model detected. Provide:
1. A clear explanation of what the model focused on and why it might indicate AI generation (or authenticity)
2. 2-4 specific visual cues a human could verify, phrased as hypotheses with hedging language
3. A confidence assessment based on the probability and focus pattern

CRITICAL: Use hedging language - "may", "suggests", "possible", "could indicate". Never claim certainty.

Respond with valid JSON matching this exact structure:
{{
  "key_finding": "One sentence main finding about what the model detected",
  "what_model_saw": "2-3 sentences explaining what the model detected and why it matters",
  "important_regions": ["Region 1 with hedging language", "Region 2...", "Region 3..."],
  "confidence_qualifier": "Assessment of reliability with appropriate hedging"
}}

Respond with valid JSON only, no markdown formatting."""
            
            # Build content parts
            content_parts = []
            
            if original_image_b64:
                from google.genai import types
                content_parts.append(types.Part.from_bytes(
                    data=base64.b64decode(original_image_b64),
                    mime_type="image/png"
                ))
                content_parts.append(types.Part.from_text(text="Original image shown above.\n\n"))
            
            if heatmap_b64:
                from google.genai import types
                content_parts.append(types.Part.from_bytes(
                    data=base64.b64decode(heatmap_b64),
                    mime_type="image/png"
                ))
                content_parts.append(types.Part.from_text(text=f"{display_info['method_name']} heatmap shown above.\n\n"))
            
            from google.genai import types
            content_parts.append(types.Part.from_text(text=prompt))
            
            # Call the LLM with JSON response mode
            logger.info(f"Generating LLM explanation for {model_name}...")
            
            response = self._client.models.generate_content(
                model=self._model_name,
                contents=content_parts,
                config=types.GenerateContentConfig(
                    temperature=0.3,
                    top_p=0.8,
                    max_output_tokens=2048,  # Increased to avoid truncation
                    response_mime_type="application/json",
                )
            )
            
            # Parse response - even with JSON mode, sometimes there are issues
            text = response.text.strip()
            
            try:
                result = json.loads(text)
            except json.JSONDecodeError as parse_err:
                # Log the problematic text for debugging
                logger.warning(f"Initial JSON parse failed: {parse_err}")
                logger.warning(f"Raw text (first 500 chars): {repr(text[:500])}")
                
                # Try to fix common issues: newlines inside strings
                # Replace literal newlines with escaped ones, but only inside quoted strings
                import re
                
                # More robust approach: find all string values and escape newlines
                def escape_newlines_in_strings(s):
                    result = []
                    in_string = False
                    escape_next = False
                    for i, c in enumerate(s):
                        if escape_next:
                            result.append(c)
                            escape_next = False
                            continue
                        if c == '\\':
                            escape_next = True
                            result.append(c)
                            continue
                        if c == '"' and not escape_next:
                            in_string = not in_string
                            result.append(c)
                            continue
                        if in_string and c == '\n':
                            result.append('\\n')
                        elif in_string and c == '\r':
                            result.append('\\r')
                        else:
                            result.append(c)
                    return ''.join(result)
                
                fixed_text = escape_newlines_in_strings(text)
                result = json.loads(fixed_text)
            
            # Add model metadata to result
            result["model_name"] = model_name
            
            return result
            
        except json.JSONDecodeError as e:
            logger.error(f"Failed to parse single model LLM response: {e}")
            return {
                "model_name": model_name,
                "key_finding": f"The {display_info['display_name']} detected potential signs of manipulation.",
                "what_model_saw": f"The model analyzed the image but detailed analysis could not be parsed. The fake probability was {prob_fake:.1%}.",
                "important_regions": ["Unable to identify specific regions."],
                "confidence_qualifier": "Analysis completed but detailed explanation unavailable due to parsing error."
            }
        except Exception as e:
            logger.error(f"Failed to generate single model explanation: {e}")
            return None


# Global singleton
_llm_service: Optional[LLMService] = None


def get_llm_service() -> LLMService:
    """Get the global LLM service instance."""
    global _llm_service
    if _llm_service is None:
        _llm_service = LLMService()
    return _llm_service