""" Query router: determines which domain(s) to search for a given query. Uses a lightweight LLM call comparing the query against domain descriptions. """ import json import litellm from src.config import MODEL, DOCUMENT_REGISTRY from src.usage import _extract_usage, _empty_usage def route_query( query: str, max_domains: int = 3, profession: str | None = None, ) -> tuple[list[str], dict]: """Determine which domain indexes to search for a given query. Args: query: The user's question (in English) max_domains: Maximum number of domains to search profession: Optional user profession (e.g. "Chiropractor"). When set, included in the routing prompt as soft context so the LLM picks domains relevant to that profession's binding rules. Returns: Tuple of (list of domain keys, usage dict) """ # Build domain description list using router-specific descriptions domain_list = "\n".join( f"- {key}: {info.get('router_description', info['description'])}" for key, info in DOCUMENT_REGISTRY.items() ) profession_line = ( f"\nThe user has stated their profession: **{profession}**. " f"When picking domains, consider that profession's specific binding " f"rules. Cross-cutting domains (medicines, advertising_standards, " f"consumer_protection, marketing_comms, practitioner_regulation) apply " f"regardless of profession; professional_codes is profession-specific." if profession else "" ) prompt = f"""You route questions to the right document collections in an NZ healthcare marketing compliance system, scoped to complementary/alternative practitioners (chiropractors, osteopaths, physiotherapists, Chinese medicine practitioners, naturopaths, acupuncturists) and supplement sellers. Think about what the user is trying to accomplish — not just which document mentions the keywords. Many real questions cut across multiple domains.{profession_line} Available domains: {domain_list} Examples: - "Can I include patient testimonials on my chiro practice website?" → ["advertising_standards", "professional_codes", "medicines_and_supplements"] (ASA testimonial rules + Chiropractic Board's own rules + s58 if any product is involved) - "Can I claim my supplement reduces inflammation?" → ["medicines_and_supplements", "consumer_protection", "advertising_standards"] (Therapeutic claim risk reclassifying it as a medicine + s12A substantiation + ASA TAC) - "Can I email my patient list a newsletter with treatment specials?" → ["marketing_comms"] (Privacy + HIPC + UEMA — the 'can I email this list?' cluster) - "Can I call myself a 'specialist' in sports physio?" → ["practitioner_regulation", "professional_codes"] (HPCA Act title-use restrictions + Physio Board's own advertising standard) - "Do I need evidence for the 'natural' claim on my product label?" → ["consumer_protection"] (s12A substantiation — 'natural' is a representation requiring reasonable basis) - "What changes when the new ASA code takes effect?" → ["advertising_standards"] (Transition window — the December 2025 code applies from 1 April 2026) - "Can I send appointment reminders by SMS without explicit consent?" → ["marketing_comms"] (UEMA + Privacy Act / HIPC depending on whether health info is involved) - "What does the Chinese Medicine Council say about traditional-use claims?" → ["professional_codes", "advertising_standards"] (CMCNZ-specific rules + general ASA framework) User question: {query} Return a JSON array of 1-{max_domains} domain keys (most relevant first). Only include domains likely to contain relevant information. Many marketing-compliance questions hit 2-3 domains because the rules layer (general consumer law + therapeutic-specific rules + profession-specific rules). Return ONLY the JSON array, nothing else.""" try: response = litellm.completion( model=MODEL, messages=[ {"role": "system", "content": "Do not use thinking. Respond directly with the JSON only."}, {"role": "user", "content": prompt}, ], temperature=0, max_tokens=500, ) usage = _extract_usage(response) content = (response.choices[0].message.content or "").strip() # Parse JSON array from response # Handle cases where model wraps in markdown code blocks if "```" in content: content = content.split("```")[1] if content.startswith("json"): content = content[4:] content = content.strip() domains = json.loads(content) # Validate domain keys valid_domains = [d for d in domains if d in DOCUMENT_REGISTRY] if not valid_domains: return list(DOCUMENT_REGISTRY.keys()), usage return valid_domains[:max_domains], usage except Exception as e: print(f"Router error: {e}. Falling back to all domains.") return list(DOCUMENT_REGISTRY.keys()), _empty_usage()