"""
GCAS Search Engine – Query Processor
======================================

The single entry-point that orchestrates the full pre-search pipeline:

  raw query (English / Hindi / Gujarati, possibly with ASR errors)
      │
      ▼  1. normalizer.process_query()
      │     • detect language
      │     • transliterate Indic script → Latin
      │     • resolve city/university/ASR aliases
      │     • expand abbreviations
      │
      ▼  2. fuzzy_matcher.analyze_entities()
      │     • find entity spans (college, district, program …)
      │     • multi-layer matching: exact → fuzzy → phonetic
      │
      ▼  3. fuzzy_matcher.rewrite_query_with_entities()
      │     • replace near-miss spans with canonical entity names
      │     • produce final embedding-ready query string
      │
      ▼  4. (after search) fuzzy_matcher.assess_confidence()
             • combine FAISS score + entity match quality
             • produce confidence_level + did_you_mean suggestions
"""
from __future__ import annotations

import logging
from dataclasses import dataclass, field
from typing import Any, Dict, List, Optional

from normalizer import process_query as normalize_query
from fuzzy_matcher import (
    EntityMatch,
    analyze_entities,
    assess_confidence,
    generate_did_you_mean,
    resolve_college_in_query,
    rewrite_query_with_entities,
)

logger = logging.getLogger(__name__)


# ---------------------------------------------------------------------------
# Pre-search result
# ---------------------------------------------------------------------------

@dataclass
class ProcessedQuery:
    """Everything the search engine needs, derived from the raw query."""

    # The cleaned, entity-corrected query string to pass to the embedder
    final_query: str

    # Human-readable metadata about what was transformed
    original_query: str
    detected_language: str               # "en" | "hi" | "gu" | "unknown"
    corrected_query: str                 # after alias/abbreviation resolution
    detected_entities: List[EntityMatch] = field(default_factory=list)

    # Per-step intermediate strings (useful for debugging)
    normalization_metadata: Dict[str, Any] = field(default_factory=dict)


# ---------------------------------------------------------------------------
# Post-search result enrichment
# ---------------------------------------------------------------------------

@dataclass
class SearchAnalysis:
    """Confidence + "did you mean" analysis produced after FAISS returns."""

    confidence_level: str         # "high" | "medium" | "low"
    adjusted_score:   float       # 0-1 score after entity boost
    did_you_mean:     List[str]   # human-readable suggestions
    entity_corrections: List[Dict[str, str]]  # [{from, to, type, score}]


# ---------------------------------------------------------------------------
# Pre-search pipeline
# ---------------------------------------------------------------------------

def process(raw_query: str) -> ProcessedQuery:
    """
    Run the full pre-search normalisation and entity correction pipeline.

    Parameters
    ----------
    raw_query : str  – raw input from user (voice transcript or chat text)

    Returns
    -------
    ProcessedQuery  – contains `final_query` ready for the embedding model
    """
    # Step 1 – Language normalisation
    corrected, lang, norm_meta = normalize_query(raw_query)

    # Step 1b – College keyword resolution
    # Checks CollegeNameSearchKeyword1-4 data: "MN" → "M. N. COLLEGE, VISNAGAR"
    # Must run before geographic entity matching so college name is in place.
    corrected, college_entities = resolve_college_in_query(corrected)

    # Step 2 – Entity detection on the normalised query (districts/talukas only)
    geo_entities = analyze_entities(corrected)
    entities = college_entities + geo_entities

    if entities:
        logger.debug(
            "Detected %d entity matches: %s",
            len(entities),
            [(e.query_span, e.matched, e.score) for e in entities],
        )

    # Step 3 – Rewrite query with canonical entity names
    final = rewrite_query_with_entities(corrected, entities)

    # If rewriting produced an empty string (shouldn't happen), fall back
    if not final.strip():
        final = corrected

    logger.info(
        "Query processed | lang=%s | raw=%r | final=%r | entities=%d",
        lang, raw_query, final, len(entities),
    )

    return ProcessedQuery(
        final_query=final,
        original_query=raw_query,
        detected_language=lang,
        corrected_query=corrected,
        detected_entities=entities,
        normalization_metadata=norm_meta,
    )


# ---------------------------------------------------------------------------
# Post-search analysis
# ---------------------------------------------------------------------------

def analyze_results(
    processed: ProcessedQuery,
    top_results: List[Dict[str, Any]],
) -> SearchAnalysis:
    """
    After FAISS + LLM reranking, assess result quality and generate
    "did you mean?" suggestions if confidence is low.

    Parameters
    ----------
    processed    : ProcessedQuery from the pre-search step
    top_results  : list of result dicts from search_engine.search()

    Returns
    -------
    SearchAnalysis
    """
    top_score = max((r.get("score", 0.0) for r in top_results), default=0.0)

    confidence_level, adjusted_score = assess_confidence(
        top_faiss_score=top_score,
        entities=processed.detected_entities,
    )

    did_you_mean: List[str] = []
    if confidence_level in ("low", "medium"):
        did_you_mean = generate_did_you_mean(
            query=processed.final_query,
            entities=processed.detected_entities,
            top_faiss_score=top_score,
            top_results=top_results,
        )

    # Summarise entity corrections for the API response
    entity_corrections = [
        {
            "original_span": e.query_span,
            "corrected_to":  e.matched,
            "entity_type":   e.entity_type,
            "match_score":   e.score,
            "method":        e.method,
        }
        for e in processed.detected_entities
        if e.query_span.lower() != e.matched.lower()  # only show actual corrections
    ]

    return SearchAnalysis(
        confidence_level=confidence_level,
        adjusted_score=adjusted_score,
        did_you_mean=did_you_mean,
        entity_corrections=entity_corrections,
    )