"""LLM-based gap analyzer — semantically detects missing design information. Uses a structured LLM call to analyze agent responses against the current DesignState and generate contextual question cards for the UI. """ from __future__ import annotations import logging from typing import TYPE_CHECKING, Literal from pydantic import BaseModel, Field if TYPE_CHECKING: from agents.agent_flow import AgentResponse from agents.design_state import DesignState logger = logging.getLogger(__name__) # ── Models ──────────────────────────────────────────────────────────────────── class DetectedGap(BaseModel): """A single piece of missing information identified by the LLM.""" category: str description: str severity: Literal["blocking", "recommended", "nice_to_have"] class GeneratedQuestionCard(BaseModel): """A contextual question card for the UI, generated by the LLM.""" category: str question: str responsible_agent: str agent_name: str = "" agent_color: str = "" severity: Literal["blocking", "recommended", "nice_to_have"] = "recommended" suggestions: list[str] = Field(default_factory=list) allow_custom: bool = True class GapAnalysisResult(BaseModel): """Top-level structured response from the gap analysis LLM call.""" has_gaps: bool = False gaps: list[DetectedGap] = Field(default_factory=list) question_cards: list[GeneratedQuestionCard] = Field(default_factory=list) from config.settings import settings from agents.definitions import AGENTS # ── System prompt ───────────────────────────────────────────────────────────── _SYSTEM_PROMPT = """\ You are a design gap analyzer for a CNC part design system. Given the current design state, agent responses, and user message, identify what information is still missing to produce a complete CNC-ready design. Rules: - Do NOT flag gaps for information already present in the design state. - Do NOT flag gaps for information the user just provided in their message. - Do NOT ask about the part name — the system derives it automatically. - Do NOT repeat questions that were already asked in previous turns. - Invent descriptive category names (e.g. "bolt_pattern", "thermal_rating", "load_capacity") — there is no fixed set. - Use snake_case for category names. - Severity: "blocking" = cannot proceed without it, "recommended" = should have for quality, "nice_to_have" = would improve but not essential. - Generate questions that reference the specific part being designed. - Suggestions should be concrete values appropriate for the part context. - Set responsible_agent to the agent best suited to handle the answer: "design", "engineering", "cnc", "cad", or "cam". - If no gaps exist, return has_gaps: false with empty lists. - Set severity on each question card matching the gap it addresses. Question abstraction level — CRITICAL: - Ask questions a normal person can answer, NOT questions requiring CAD or engineering expertise. The user describes INTENT; the agents derive the technical implementation. - NEVER ask for exact coordinates, precise radii, specific tolerance values, G-code parameters, or CadQuery implementation details. - GOOD: "Where should the mounting holes go?" with suggestions like "one in each corner", "evenly spaced along the edges", "centered on top". - BAD: "What are the precise X, Y, Z coordinates for the M4 mounting holes?" - GOOD: "Should the edges be sharp or rounded?" - BAD: "What fillet radius should be applied to the internal pocket edges?" - GOOD: "How thick should the walls be?" with suggestions like "thin (1-2mm)", "standard (3-5mm)", "heavy-duty (6mm+)". - BAD: "What is the minimum wall thickness in mm for the vertical ribs?" - Frame questions in everyday language. Offer 2-4 plain-language suggestions that map to concrete engineering values behind the scenes. - Technical details (coordinates, exact radii, tolerance classes, feed rates) are the agents' job to determine from the user's high-level answers. """ # ── LLM call ────────────────────────────────────────────────────────────────── def _build_llm(): """Build a crewai.LLM instance from gap analysis config.""" from crewai import LLM cfg = settings.gap_analysis return LLM(model=cfg.model, temperature=cfg.temperature, max_tokens=cfg.max_tokens) def _build_prompt( agent_responses: list[AgentResponse], state: DesignState, user_message: str, ) -> str: """Build the user-message prompt with state, responses, and user message.""" parts = [ "## Design State", state.model_dump_json(indent=2), "", "## Agent Responses", ] for resp in agent_responses: parts.append(f"### {resp.agent_id}") parts.append(resp.message) parts.append("") parts.append("## User Message") parts.append(user_message or "(none)") return "\n".join(parts) def _enrich_agent_metadata(result: GapAnalysisResult) -> None: """Populate agent_name and agent_color on each question card from AGENTS dict.""" for card in result.question_cards: agent_def = AGENTS.get(card.responsible_agent) if agent_def: card.agent_name = agent_def.name card.agent_color = agent_def.color else: card.agent_name = card.responsible_agent.title() card.agent_color = "#888888" def analyze_gaps( agent_responses: list[AgentResponse], state: DesignState, user_message: str = "", ) -> GapAnalysisResult: """Analyze agent responses for missing design information using an LLM. Returns a GapAnalysisResult with detected gaps and contextual question cards. On any failure, returns an empty result (gap analysis is non-blocking). """ try: llm = _build_llm() prompt = _build_prompt(agent_responses, state, user_message) messages = [ {"role": "system", "content": _SYSTEM_PROMPT}, {"role": "user", "content": prompt}, ] result = llm.call(messages, response_model=GapAnalysisResult) if not isinstance(result, GapAnalysisResult): logger.debug("Gap analysis LLM returned non-model response, parsing as JSON") result = GapAnalysisResult.model_validate_json(result) _enrich_agent_metadata(result) return result except Exception: logger.warning("Gap analysis LLM call failed", exc_info=True) return GapAnalysisResult()