Spaces:
Sleeping
Sleeping
| """ | |
| Query router: determines which domain(s) to search for a given query. | |
| Uses a lightweight LLM call comparing the query against domain descriptions. | |
| """ | |
| import json | |
| import litellm | |
| from src.config import MODEL, DOCUMENT_REGISTRY | |
| from src.usage import _extract_usage, _empty_usage | |
| def route_query( | |
| query: str, | |
| max_domains: int = 3, | |
| profession: str | None = None, | |
| ) -> tuple[list[str], dict]: | |
| """Determine which domain indexes to search for a given query. | |
| Args: | |
| query: The user's question (in English) | |
| max_domains: Maximum number of domains to search | |
| profession: Optional user profession (e.g. "Chiropractor"). When set, | |
| included in the routing prompt as soft context so the LLM picks | |
| domains relevant to that profession's binding rules. | |
| Returns: | |
| Tuple of (list of domain keys, usage dict) | |
| """ | |
| # Build domain description list using router-specific descriptions | |
| domain_list = "\n".join( | |
| f"- {key}: {info.get('router_description', info['description'])}" | |
| for key, info in DOCUMENT_REGISTRY.items() | |
| ) | |
| profession_line = ( | |
| f"\nThe user has stated their profession: **{profession}**. " | |
| f"When picking domains, consider that profession's specific binding " | |
| f"rules. Cross-cutting domains (medicines, advertising_standards, " | |
| f"consumer_protection, marketing_comms, practitioner_regulation) apply " | |
| f"regardless of profession; professional_codes is profession-specific." | |
| if profession | |
| else "" | |
| ) | |
| prompt = f"""You route questions to the right document collections in an NZ healthcare marketing compliance system, scoped to complementary/alternative practitioners (chiropractors, osteopaths, physiotherapists, Chinese medicine practitioners, naturopaths, acupuncturists) and supplement sellers. | |
| Think about what the user is trying to accomplish β not just which document mentions the keywords. Many real questions cut across multiple domains.{profession_line} | |
| Available domains: | |
| {domain_list} | |
| Examples: | |
| - "Can I include patient testimonials on my chiro practice website?" β ["advertising_standards", "professional_codes", "medicines_and_supplements"] | |
| (ASA testimonial rules + Chiropractic Board's own rules + s58 if any product is involved) | |
| - "Can I claim my supplement reduces inflammation?" β ["medicines_and_supplements", "consumer_protection", "advertising_standards"] | |
| (Therapeutic claim risk reclassifying it as a medicine + s12A substantiation + ASA TAC) | |
| - "Can I email my patient list a newsletter with treatment specials?" β ["marketing_comms"] | |
| (Privacy + HIPC + UEMA β the 'can I email this list?' cluster) | |
| - "Can I call myself a 'specialist' in sports physio?" β ["practitioner_regulation", "professional_codes"] | |
| (HPCA Act title-use restrictions + Physio Board's own advertising standard) | |
| - "Do I need evidence for the 'natural' claim on my product label?" β ["consumer_protection"] | |
| (s12A substantiation β 'natural' is a representation requiring reasonable basis) | |
| - "What changes when the new ASA code takes effect?" β ["advertising_standards"] | |
| (Transition window β the December 2025 code applies from 1 April 2026) | |
| - "Can I send appointment reminders by SMS without explicit consent?" β ["marketing_comms"] | |
| (UEMA + Privacy Act / HIPC depending on whether health info is involved) | |
| - "What does the Chinese Medicine Council say about traditional-use claims?" β ["professional_codes", "advertising_standards"] | |
| (CMCNZ-specific rules + general ASA framework) | |
| User question: {query} | |
| Return a JSON array of 1-{max_domains} domain keys (most relevant first). Only include domains likely to contain relevant information. Many marketing-compliance questions hit 2-3 domains because the rules layer (general consumer law + therapeutic-specific rules + profession-specific rules). | |
| Return ONLY the JSON array, nothing else.""" | |
| try: | |
| response = litellm.completion( | |
| model=MODEL, | |
| messages=[ | |
| {"role": "system", "content": "Do not use thinking. Respond directly with the JSON only."}, | |
| {"role": "user", "content": prompt}, | |
| ], | |
| temperature=0, | |
| max_tokens=500, | |
| ) | |
| usage = _extract_usage(response) | |
| content = (response.choices[0].message.content or "").strip() | |
| # Parse JSON array from response | |
| # Handle cases where model wraps in markdown code blocks | |
| if "```" in content: | |
| content = content.split("```")[1] | |
| if content.startswith("json"): | |
| content = content[4:] | |
| content = content.strip() | |
| domains = json.loads(content) | |
| # Validate domain keys | |
| valid_domains = [d for d in domains if d in DOCUMENT_REGISTRY] | |
| if not valid_domains: | |
| return list(DOCUMENT_REGISTRY.keys()), usage | |
| return valid_domains[:max_domains], usage | |
| except Exception as e: | |
| print(f"Router error: {e}. Falling back to all domains.") | |
| return list(DOCUMENT_REGISTRY.keys()), _empty_usage() | |