import operator
from typing import Annotated, Optional, TypedDict

from app.models.chat import SourceRef


class ChunkMetadata(TypedDict, total=False):
    """
    Per-chunk payload stored in Qdrant.

    All fields have total=False so optional fields can be added without
    migrating existing points.  Callers must .get() with a default for safety.

    chunk_type is the canonical discriminator for how a point is used:
      "leaf"           — real portfolio content; the ONLY type shown to the user.
      "raptor_summary" — synthetic cluster summary; retrieval aid only.
      "question_proxy" — hypothetical question embedding; retrieval aid only.
    """
    doc_id: str
    source_title: str
    source_url: str           # Absolute URL or "" for resume. Never a relative path.
    section: str
    source_type: str          # "blog" | "project" | "resume" | "readme"
    date: str
    tags: list[str]
    # ── Canonical chunk_type discriminator ────────────────────────────────────
    chunk_type: str           # "leaf" | "raptor_summary" | "question_proxy"
    # ── Leaf-only fields ──────────────────────────────────────────────────────
    # Proper nouns + domain terms extracted by capitalisation heuristic at
    # ingestion time.  Used for Qdrant keyword payload filter at query time so
    # canonical name variants ("XSilica", "XSILICA") all match.
    keywords: list[str]
    # ── Positional ordering field ─────────────────────────────────────────────
    # 0-based position of this chunk within its source document, set at ingestion
    # time by heading_chunker.  Used for ordered sibling expansion at query time
    # so retrieve.py can prefer adjacent chunks over arbitrary doc members.
    chunk_index: int
    # ── raptor_summary-only fields ────────────────────────────────────────────
    # Qdrant point UUIDs of the leaf chunks that were summarised to produce
    # this cluster node.  Used at query time to expand relevant cluster hits
    # to their actual evidence passages.
    child_leaf_ids: list[str]
    # ── question_proxy-only fields ─────────────────────────────────────────────
    # Qdrant point UUID of the parent leaf chunk.  Used at query time to resolve
    # a question_proxy hit to the underlying evidence passage.
    parent_leaf_id: str
    # ── Legacy fields (kept for old points that pre-date the new schema) ──────
    raptor_level: int         # deprecated — use chunk_type instead
    linked_chunks: list[str]  # deprecated — replaced by child_leaf_ids


class Chunk(TypedDict):
    text: str
    metadata: ChunkMetadata


class PipelineState(TypedDict):
    query: str
    query_complexity: str
    session_id: str
    query_embedding: Optional[list[float]]   # set by cache node, reused by retrieve
    expanded_queries: Annotated[list[str], operator.add]
    retrieved_chunks: Annotated[list[Chunk], operator.add]
    reranked_chunks: Annotated[list[Chunk], operator.add]
    answer: str
    sources: Annotated[list[SourceRef], operator.add]
    cached: bool
    cache_key: Optional[str]
    guard_passed: bool
    thinking: bool          # True while Gemini has signalled RAG is needed
    # Last N Q/A pairs for this session — injected into prompts for follow-up context.
    conversation_history: list
    # Stage 2: rolling conversation summary (single paragraph, ≤150 tokens).
    # Injected into generate/gemini_fast instead of raw turn list when present.
    conversation_summary: Optional[str]
    # Stage 2: self-contained query rewritten before retrieval when the original
    # contains unresolved pronouns/references. Used for embedding; original query
    # is used for display and system prompt.
    decontextualized_query: Optional[str]
    # True when the current query explicitly criticises the previous answer.
    is_criticism: bool
    # Stage 1: True when submitted via a follow-up pill button.
    # Bypasses Gemini fast-path so pill follow-ups always produce cited RAG answers.
    is_followup: bool
    latency_ms: int
    error: Optional[str]
    interaction_id: Optional[int]
    # CRAG: counts retrieve node invocations; 2 = one retry was attempted.
    retrieval_attempts: int
    rewritten_query: Optional[str]
    # Top cross-encoder score from the last retrieve call. Used by CRAG routing.
    top_rerank_score: Optional[float]
    follow_ups: list[str]
    # Which pipeline branch produced the final answer.
    path: Optional[str]
    # 1–3 word topic extracted from the query by the guard node.
    query_topic: Optional[str]
    # Stage 3: SELF-RAG critic scores (1–3 each). Logged to SQLite for training.
    critic_groundedness: Optional[int]   # all claims supported by a specific chunk
    critic_completeness: Optional[int]  # answer uses all relevant available chunks
    critic_specificity: Optional[int]   # answer contains specific names/numbers
    critic_quality: Optional[str]       # "high" | "medium" | "low"
    # Fix 1: Enumeration query classifier.
    # True when the query has enumeration intent ("list all projects", "how many blogs").
    # The enumerate_query node skips semantic retrieval and does a Qdrant payload-filter
    # scroll instead, returning a complete deduplicated title list.
    # Logged to SQLite so enumeration turns can be monitored separately from RAG turns.
    is_enumeration_query: bool
    # Bug 4: canonical name forms produced by expand_query (all casing variants of
    # proper nouns in the query).  Fed into the BM25 query as a union so the sparse
    # component scores positively across "XSilica", "XSILICA", "xsilica", etc.
    query_canonical_forms: list[str]
    # RC-13: retrieval diagnostics logged per turn.
    sibling_expansion_count: Optional[int]  # chunks added via sibling expansion
    focused_source_type: Optional[str]      # e.g. "cv", "project", "blog", None