""" input.py — Query extraction with expanded keyword awareness. Key changes from original: 1. Prompt now explicitly lists the kinds of terms that belong in exact_keywords, including role-specific tools the LLM should infer (e.g. "statistics" → keyword, not just explicit software names). This fixes the C4 miss on "Basic Statistics (New)". 2. Added SYNONYM_HINTS instruction so the LLM expands common abbreviations: "Word" → ["Microsoft Word", "Word 365"], "Excel" → ["Microsoft Excel", "Excel 365"]. This fixes the C7/C8 misses on Microsoft Office products. 3. semantic_query rule now requires incorporating job role, skills, AND seniority so the FAISS embed is richer. 4. Reduced history window to last 6 messages (was unbounded) to keep token usage stable and avoid context poisoning from early turns. """ import os from typing import Optional, List from pydantic import BaseModel, Field from langchain_core.prompts import ChatPromptTemplate from langchain_groq import ChatGroq # ------------------------------------------------------------------- # 1. Pydantic Schema # ------------------------------------------------------------------- class QueryExtraction(BaseModel): job_level: Optional[str] = Field( description="Seniority level mapped to the valid taxonomy. None if unspecified." ) test_category: Optional[str] = Field( description="Assessment category mapped to the valid taxonomy. None if unspecified." ) semantic_query: str = Field( description=( "A rich, multi-word summary combining: the job role, required skills, " "seniority, and type of assessment needed. Must reflect the ENTIRE conversation, " "not just the last message. E.g. 'mid-level Java developer stakeholder communication " "personality cognitive ability assessment'." ) ) exact_keywords: List[str] = Field( default=[], description=( "Specific terms that should trigger a name/description keyword search. Include: " "(a) explicit software/tools (Excel, Docker, Linux, Java, SAP, Python, Word, SQL), " "(b) domain-specific terminology (Medical Terminology, Statistics, IFRS), " "(c) assessment product names mentioned by the user (OPQ, GSA, Verify), " "(d) EXPANDED SYNONYMS: if user says 'Excel' also add 'Microsoft Excel' and 'Excel 365'; " "if user says 'Word' also add 'Microsoft Word' and 'Word 365'; " "if user says 'coding test' or 'live coding' also add 'Smart Interview'. " "IMPORTANT: include both the short form AND the full product name." ) ) is_comparison: bool = Field( description="True ONLY if the user explicitly asks to compare two or more specific tests." ) # ------------------------------------------------------------------- # 2. LLM Setup # ------------------------------------------------------------------- llm = ChatGroq(model="llama-3.3-70b-versatile", temperature=0, max_retries=5) structured_llm = llm.with_structured_output(QueryExtraction) # ------------------------------------------------------------------- # 3. Valid Taxonomies # ------------------------------------------------------------------- VALID_JOB_LEVELS = [ "Director", "Entry-Level", "Executive", "Front Line Manager", "General Population", "Graduate", "Manager", "Mid-Professional", "Professional Individual Contributor", "Supervisor" ] VALID_CATEGORIES = [ "Ability & Aptitude", "Assessment Exercises", "Biodata & Situational Judgment", "Competencies", "Development & 360", "Knowledge & Skills", "Personality & Behavior", "Simulations" ] # ------------------------------------------------------------------- # 4. System Prompt # ------------------------------------------------------------------- system_prompt = f""" You are the query extraction engine for an SHL Assessment catalog database. Read the full conversation history and the latest user message, then extract structured search parameters. TAXONOMY CONSTRAINTS (map semantically, never invent values outside these lists): job_level MUST be one of: {VALID_JOB_LEVELS} Mapping examples: - "4 years experience", "mid-level" → "Mid-Professional" - "new grad", "fresh graduate" → "Graduate" - "C-suite", "VP", "Chief" → "Executive" - "team lead", "line manager" → "Front Line Manager" - "entry", "junior", "0-2 years" → "Entry-Level" - "individual contributor" → "Professional Individual Contributor" test_category MUST be one of: {VALID_CATEGORIES} Mapping examples: - "coding test", "programming test", "technical skills" → "Knowledge & Skills" - "personality", "behavior", "culture fit" → "Personality & Behavior" - "cognitive", "aptitude", "reasoning", "IQ" → "Ability & Aptitude" - "simulation", "role-play", "situational" → "Simulations" - "360", "development", "feedback" → "Development & 360" - "situational judgment", "biodata" → "Biodata & Situational Judgment" - "competency", "leadership" → "Competencies" SEMANTIC QUERY RULE: Do NOT copy the user's latest message verbatim. Synthesize the ENTIRE conversation into a single rich phrase covering: [seniority] + [job role] + [required skills] + [assessment type needed] Example: conversation about hiring a mid-level Java dev who needs stakeholder communication → "mid-level Java developer stakeholder communication personality knowledge assessment" EXACT KEYWORDS RULE: Think broadly about what specific terms should trigger a database keyword search. - Software tools: "Excel", "Microsoft Excel", "Excel 365" (add ALL three if any one is mentioned) - Programming languages: "Java", "Python", "Linux", "SQL" - Domain terms: "Medical Terminology", "Statistics", "IFRS", "Safety" - Assessment names: "OPQ", "OPQ32r", "Verify", "GSA", "Smart Interview" - Spoken/voice assessments: if user mentions "spoken English", "voice", "accent" → add "SVAR", "Spoken English" - "Live coding" or "coding interview" → add "Smart Interview", "Live Coding" - ALWAYS expand abbreviations to their full names as additional keywords COMPARISON RULE: Set is_comparison=true ONLY when the user explicitly asks "compare", "difference between", "vs", or "which is better" for two named tests. """ prompt = ChatPromptTemplate.from_messages([ ("system", system_prompt), ("placeholder", "{conversation_history}"), ("human", "{user_input}") ]) # ------------------------------------------------------------------- # 5. Chain # ------------------------------------------------------------------- extraction_chain = prompt | structured_llm # ------------------------------------------------------------------- # 6. History Trimmer — keep last 6 messages to control token usage # ------------------------------------------------------------------- def trim_history(messages: list, max_turns: int = 6) -> list: """Return the last `max_turns` messages as (role, content) tuples.""" return [(m[0], m[1]) for m in messages[-max_turns:]] # ------------------------------------------------------------------- # 7. Smoke Tests # ------------------------------------------------------------------- if __name__ == "__main__": tests = [ { "label": "Standard Java + personality", "history": [], "input": "I need a personality test for a mid-level Java dev." }, { "label": "Comparison", "history": [], "input": "What is the difference between the OPQ32r and the GSA?" }, { "label": "Office suite expansion (C7/C8 regression)", "history": [("user", "Hiring admin staff"), ("assistant", "What level?"), ("user", "Entry level")], "input": "They need to use Excel and Word daily." }, { "label": "Live coding (C2 regression)", "history": [("user", "Hiring a software engineer"), ("assistant", "What seniority?")], "input": "Mid-level. I want a live coding interview assessment." }, { "label": "Spoken English (C3 regression)", "history": [("user", "Customer service role in a call center")], "input": "Entry level. They must have good spoken English." }, ] for t in tests: result = extraction_chain.invoke({ "conversation_history": t["history"], "user_input": t["input"] }) print(f"\n--- {t['label']} ---") print(result.model_dump_json(indent=2))