"""
Query router: determines which domain(s) to search for a given query.
Uses a lightweight LLM call comparing the query against domain descriptions.
"""

import json
import litellm
from src.config import MODEL, DOCUMENT_REGISTRY
from src.usage import _extract_usage, _empty_usage


def route_query(
    query: str,
    max_domains: int = 3,
    profession: str | None = None,
) -> tuple[list[str], dict]:
    """Determine which domain indexes to search for a given query.

    Args:
        query: The user's question (in English)
        max_domains: Maximum number of domains to search
        profession: Optional user profession (e.g. "Chiropractor"). When set,
            included in the routing prompt as soft context so the LLM picks
            domains relevant to that profession's binding rules.

    Returns:
        Tuple of (list of domain keys, usage dict)
    """
    # Build domain description list using router-specific descriptions
    domain_list = "\n".join(
        f"- {key}: {info.get('router_description', info['description'])}"
        for key, info in DOCUMENT_REGISTRY.items()
    )

    profession_line = (
        f"\nThe user has stated their profession: **{profession}**. "
        f"When picking domains, consider that profession's specific binding "
        f"rules. Cross-cutting domains (medicines, advertising_standards, "
        f"consumer_protection, marketing_comms, practitioner_regulation) apply "
        f"regardless of profession; professional_codes is profession-specific."
        if profession
        else ""
    )

    prompt = f"""You route questions to the right document collections in an NZ healthcare marketing compliance system, scoped to complementary/alternative practitioners (chiropractors, osteopaths, physiotherapists, Chinese medicine practitioners, naturopaths, acupuncturists) and supplement sellers.

Think about what the user is trying to accomplish — not just which document mentions the keywords. Many real questions cut across multiple domains.{profession_line}

Available domains:
{domain_list}

Examples:
- "Can I include patient testimonials on my chiro practice website?" → ["advertising_standards", "professional_codes", "medicines_and_supplements"]
  (ASA testimonial rules + Chiropractic Board's own rules + s58 if any product is involved)
- "Can I claim my supplement reduces inflammation?" → ["medicines_and_supplements", "consumer_protection", "advertising_standards"]
  (Therapeutic claim risk reclassifying it as a medicine + s12A substantiation + ASA TAC)
- "Can I email my patient list a newsletter with treatment specials?" → ["marketing_comms"]
  (Privacy + HIPC + UEMA — the 'can I email this list?' cluster)
- "Can I call myself a 'specialist' in sports physio?" → ["practitioner_regulation", "professional_codes"]
  (HPCA Act title-use restrictions + Physio Board's own advertising standard)
- "Do I need evidence for the 'natural' claim on my product label?" → ["consumer_protection"]
  (s12A substantiation — 'natural' is a representation requiring reasonable basis)
- "What changes when the new ASA code takes effect?" → ["advertising_standards"]
  (Transition window — the December 2025 code applies from 1 April 2026)
- "Can I send appointment reminders by SMS without explicit consent?" → ["marketing_comms"]
  (UEMA + Privacy Act / HIPC depending on whether health info is involved)
- "What does the Chinese Medicine Council say about traditional-use claims?" → ["professional_codes", "advertising_standards"]
  (CMCNZ-specific rules + general ASA framework)

User question: {query}

Return a JSON array of 1-{max_domains} domain keys (most relevant first). Only include domains likely to contain relevant information. Many marketing-compliance questions hit 2-3 domains because the rules layer (general consumer law + therapeutic-specific rules + profession-specific rules).
Return ONLY the JSON array, nothing else."""

    try:
        response = litellm.completion(
            model=MODEL,
            messages=[
                {"role": "system", "content": "Do not use thinking. Respond directly with the JSON only."},
                {"role": "user", "content": prompt},
            ],
            temperature=0,
            max_tokens=500,
        )
        usage = _extract_usage(response)
        content = (response.choices[0].message.content or "").strip()

        # Parse JSON array from response
        # Handle cases where model wraps in markdown code blocks
        if "```" in content:
            content = content.split("```")[1]
            if content.startswith("json"):
                content = content[4:]
            content = content.strip()

        domains = json.loads(content)

        # Validate domain keys
        valid_domains = [d for d in domains if d in DOCUMENT_REGISTRY]
        if not valid_domains:
            return list(DOCUMENT_REGISTRY.keys()), usage

        return valid_domains[:max_domains], usage

    except Exception as e:
        print(f"Router error: {e}. Falling back to all domains.")
        return list(DOCUMENT_REGISTRY.keys()), _empty_usage()