"""
Knowledge Universe — Coverage Confidence Score (Blend Mode — John's Optimization)
==================================================================================

JOHN'S OPTIMIZATION:
  Added compute_from_embeddings() — accepts pre-computed embeddings from the
  reranker so we never call model.encode() twice in the same request.

  Before: 2x model.encode() = ~800ms overhead
  After:  1x model.encode() + cosine similarity = ~10ms overhead

  The API is unchanged. compute() still works for standalone use.
  compute_from_embeddings() is the fast path used by request_handler.
"""

import logging
from typing import List, Dict, Any, Optional

logger = logging.getLogger(__name__)

_HIGH_CONFIDENCE   = 0.65
_MEDIUM_CONFIDENCE = 0.45

_ACRONYM_EXPANSIONS = {
    "rlhf":    "reinforcement learning from human feedback",
    "llm":     "large language model",
    "rag":     "retrieval augmented generation",
    "nlp":     "natural language processing",
    "gnn":     "graph neural network",
    "cnn":     "convolutional neural network",
    "rnn":     "recurrent neural network",
    "lstm":    "long short-term memory",
    "bert":    "bidirectional encoder representations transformers",
    "gpt":     "generative pre-trained transformer",
    "vae":     "variational autoencoder",
    "gan":     "generative adversarial network",
    "moe":     "mixture of experts",
    "peft":    "parameter efficient fine tuning",
    "lora":    "low rank adaptation",
    "dpo":     "direct preference optimization",
    "sft":     "supervised fine tuning",
    "mlops":   "machine learning operations",
}

_QUERY_REFINEMENTS = {
    "transformer architecture": [
        "attention mechanism self-attention explained",
        "transformer encoder decoder tutorial",
        "attention is all you need paper explained",
    ],
    "machine learning": [
        "machine learning beginner tutorial python",
        "supervised learning algorithms explained",
        "machine learning fundamentals course",
    ],
    "deep learning": [
        "neural network backpropagation explained",
        "deep learning pytorch tutorial beginner",
        "convolutional neural network image classification",
    ],    
    "rlhf": [
        "reinforcement learning from human feedback explained",
        "reward model training language model",
        "RLHF PPO implementation tutorial",
    ],
    "mixture of experts": [
        "sparse mixture of experts architecture explained",
        "MoE gating network routing mechanism",
        "mixture of experts transformer tutorial",
    ],
}

DOMAIN_QUERY_PATTERNS = {
    # Physics/quantum — suggest arXiv-specific terms
    ("quantum", "entanglement", "qubit", "hamiltonian", "decoherence"): [
        "{topic} arxiv review",
        "{topic} lecture notes pdf",
        "{topic} error analysis",
    ],
    # ML frameworks — suggest code and docs
    ("langchain", "llamaindex", "langgraph", "haystack", "dspy"): [
        "{topic} github example",
        "{topic} documentation tutorial",
        "{topic} cookbook python",
    ],
    # RAG/retrieval — suggest implementation
    ("rag", "retrieval", "vector", "embedding", "chunking"): [
        "{topic} implementation guide",
        "{topic} evaluation metrics",
        "production {topic} architecture",
    ],
    # Finance/trading
    ("fintech", "trading", "portfolio", "risk", "quant"): [
        "{topic} research paper 2025",
        "{topic} python implementation",
        "{topic} backtesting guide",
    ],
    # Deep learning techniques
    ("transformer", "attention", "bert", "gpt", "llm", "fine-tuning"): [
        "{topic} paper explained",
        "{topic} implementation pytorch",
        "{topic} benchmark comparison",
    ],
    # MLOps/deployment
    ("mlops", "deployment", "inference", "serving", "vllm"): [
        "{topic} production guide",
        "{topic} kubernetes setup",
        "{topic} optimization tutorial",
    ],
    # Materials science / physics
    ("superconductor", "lk-99", "quantum", "material", "semiconductor",
     "perovskite", "graphene", "topological"): [
        "{topic} arxiv preprint 2024",
        "{topic} experimental results review",
        "{topic} physics mechanism explained",
    ],
    
    # Neuroscience / biology
    ("neural", "neuron", "brain", "cortex", "synapse"): [
        "{topic} research paper",
        "{topic} computational model",
        "{topic} biological mechanism",
    ],
}

class CoverageConfidenceScorer:
    """
    Computes how well returned results match the query intent.
    
    Fast path: compute_from_embeddings() — reuses embeddings from reranker.
    Slow path: compute() — encodes from scratch (standalone use only).
    """

    def __init__(self):
        self._model = None

    def _get_model(self):
        if self._model is None:
            try:
                from src.integrations.shared_model import get_shared_model
                self._model = get_shared_model()
            except Exception as e:
                logger.error(f"Failed to get shared model: {e}")
                return None
        return self._model

    def compute_from_embeddings(
        self,
        query: str,
        sources: List[Dict[str, Any]],
        query_emb,
        doc_embs,
        top_k: int = 5,
    ) -> Dict[str, Any]:
        """
        FAST PATH — John's optimization.
        
        Uses pre-computed embeddings from the reranker.
        Zero additional model.encode() calls.
        Cost: ~5ms (just cosine similarity computation).
        
        Args:
            query:     Original query string
            sources:   Result dicts (same order as doc_embs)
            query_emb: Tensor from reranker's model.encode(query)
            doc_embs:  Tensor from reranker's model.encode(texts)
            top_k:     How many results to score
        """
        if query_emb is None or doc_embs is None or not sources:
            return self._no_results_response(query) if not sources else self._unavailable_response()

        try:
            from sentence_transformers import util

            top_sources = sources[:top_k]
            # doc_embs contains all documents — slice to top_k
            top_embs = doc_embs[:top_k]

            sims = util.cos_sim(query_emb, top_embs)[0]
            sim_scores = [max(0.0, float(s)) for s in sims]

            # Weighted average — top results count more
            weights = [1.0 / (i + 1) for i in range(len(sim_scores))]
            total_w = sum(weights)
            confidence = sum(s * w for s, w in zip(sim_scores, weights)) / total_w
            confidence = round(confidence, 3)

            per_result = [
                {
                    "title":      top_sources[i].get("title", "")[:60],
                    "similarity": round(sim_scores[i], 3),
                }
                for i in range(len(sim_scores))
            ]

            return self._build_response(query, confidence, per_result)

        except Exception as e:
            logger.error(f"Fast confidence scoring failed: {e}")
            return self._unavailable_response()

    def compute(
        self,
        query: str,
        sources: List[Dict[str, Any]],
        top_k: int = 5,
    ) -> Dict[str, Any]:
        """
        SLOW PATH — standalone use only.
        Encodes from scratch. Use compute_from_embeddings() in production.
        """
        if not sources:
            return self._no_results_response(query)

        model = self._get_model()
        if model is None:
            return self._unavailable_response()

        try:
            from sentence_transformers import util

            query_emb   = model.encode(query, convert_to_tensor=True)
            top_sources = sources[:top_k]
            texts = [
                f"{s.get('title', '')} {(s.get('summary') or '')[:200]}"
                for s in top_sources
            ]
            doc_embs = model.encode(texts, convert_to_tensor=True)

            return self.compute_from_embeddings(
                query, top_sources, query_emb, doc_embs, top_k
            )

        except Exception as e:
            logger.error(f"Confidence scoring failed: {e}")
            return self._unavailable_response()

    def _build_response(
        self,
        query: str,
        confidence: float,
        per_result: List[Dict],
    ) -> Dict[str, Any]:
        # Always generate suggestions — useful at all confidence levels
        all_suggestions = self._suggest_queries(query)

        if confidence >= _HIGH_CONFIDENCE:
            label       = "high"
            warning     = False
            message     = None
            # Still provide 2 suggestions for query refinement even at high confidence
            suggestions = all_suggestions[:2]

        elif confidence >= _MEDIUM_CONFIDENCE:
            label       = "medium"
            warning     = False
            message     = (
                "Results partially match your query. "
                "Consider refining with more specific terms."
            )
            suggestions = all_suggestions[:3]

        else:
            label   = "low"
            warning = True
            message = (
                f"Low confidence ({confidence:.2f}) — results may not fully "
                "match your query intent. Try the suggested queries below."
            )
            suggestions = all_suggestions[:3]


        return {
            "confidence":              confidence,
            "confidence_label":        label,
            "coverage_warning":        warning,
            "warning_message":         message,
            "suggested_queries":       suggestions,
            "top_result_similarities": per_result,
        }

    # ============================================================
    # FEATURE 4: Domain-aware query refinement
    # src/scoring/coverage_confidence.py
    # Replace _suggest_queries() and add DOMAIN_QUERY_PATTERNS
    # ============================================================

    def _suggest_queries(self, query: str) -> List[str]:
        """
        Domain-aware query refinement instead of generic 'explained/tutorial python'.
        Matches domain patterns first, falls back to acronym expansion, then generic.
        """
        suggestions = []
        query_lower = query.lower().strip()

        # 1. Explicit query refinements (highest priority)
        for pattern, refinements in _QUERY_REFINEMENTS.items():
            if pattern in query_lower:
                suggestions.extend(refinements)
                break

        # 2. Domain-aware patterns
        if not suggestions:
            topic_words = set(query_lower.split())
            for domain_keywords, patterns in DOMAIN_QUERY_PATTERNS.items():
                if any(kw in query_lower for kw in domain_keywords):
                    # Extract the most meaningful topic phrase for {topic} substitution
                    topic = _extract_topic(query_lower, domain_keywords)
                    for pattern in patterns:
                        filled = pattern.format(topic=topic)
                        if filled not in suggestions:
                            suggestions.append(filled)
                    break

        # 3. Acronym expansion
        words = query_lower.split()
        for word in words:
            if word in _ACRONYM_EXPANSIONS:
                expanded      = _ACRONYM_EXPANSIONS[word]
                expanded_query = query_lower.replace(word, expanded)
                if expanded_query not in suggestions:
                    suggestions.append(expanded_query)

        # 4. Generic fallback only if nothing matched
        if not suggestions:
            base = query_lower
            suggestions = [
                f"{base} tutorial 2025",
                f"{base} implementation guide",
                f"introduction to {base}",
            ]

        seen   = {query_lower}
        unique = []
        for s in suggestions:
            if s not in seen:
                seen.add(s)
                unique.append(s)

        return unique[:3]

    def _no_results_response(self, query: str) -> Dict[str, Any]:
        return {
            "confidence":              0.0,
            "confidence_label":        "none",
            "coverage_warning":        True,
            "warning_message":         "No results returned for this query.",
            "suggested_queries":       self._suggest_queries(query),
            "top_result_similarities": [],
        }

    def _unavailable_response(self) -> Dict[str, Any]:
        return {
            "confidence":              None,
            "confidence_label":        "unavailable",
            "coverage_warning":        False,
            "warning_message":         None,
            "suggested_queries":       [],
            "top_result_similarities": [],
        }
    
def _extract_topic(query: str, domain_keywords: tuple) -> str:
    """Extract topic phrase by removing domain keyword from query."""
    for kw in domain_keywords:
        if kw in query:
            # Return query with the matched keyword stripped of stop words
            return query.strip()
    return query.strip()