Spaces:
Sleeping
Sleeping
Daniel Tu
feat: constrain agent questions to user-friendly abstraction level (#13)
afd1605 unverified | """LLM-based gap analyzer — semantically detects missing design information. | |
| Uses a structured LLM call to analyze agent responses against the current | |
| DesignState and generate contextual question cards for the UI. | |
| """ | |
| from __future__ import annotations | |
| import logging | |
| from typing import TYPE_CHECKING, Literal | |
| from pydantic import BaseModel, Field | |
| if TYPE_CHECKING: | |
| from agents.agent_flow import AgentResponse | |
| from agents.design_state import DesignState | |
| logger = logging.getLogger(__name__) | |
| # ── Models ──────────────────────────────────────────────────────────────────── | |
| class DetectedGap(BaseModel): | |
| """A single piece of missing information identified by the LLM.""" | |
| category: str | |
| description: str | |
| severity: Literal["blocking", "recommended", "nice_to_have"] | |
| class GeneratedQuestionCard(BaseModel): | |
| """A contextual question card for the UI, generated by the LLM.""" | |
| category: str | |
| question: str | |
| responsible_agent: str | |
| agent_name: str = "" | |
| agent_color: str = "" | |
| severity: Literal["blocking", "recommended", "nice_to_have"] = "recommended" | |
| suggestions: list[str] = Field(default_factory=list) | |
| allow_custom: bool = True | |
| class GapAnalysisResult(BaseModel): | |
| """Top-level structured response from the gap analysis LLM call.""" | |
| has_gaps: bool = False | |
| gaps: list[DetectedGap] = Field(default_factory=list) | |
| question_cards: list[GeneratedQuestionCard] = Field(default_factory=list) | |
| from config.settings import settings | |
| from agents.definitions import AGENTS | |
| # ── System prompt ───────────────────────────────────────────────────────────── | |
| _SYSTEM_PROMPT = """\ | |
| You are a design gap analyzer for a CNC part design system. | |
| Given the current design state, agent responses, and user message, identify | |
| what information is still missing to produce a complete CNC-ready design. | |
| Rules: | |
| - Do NOT flag gaps for information already present in the design state. | |
| - Do NOT flag gaps for information the user just provided in their message. | |
| - Do NOT ask about the part name — the system derives it automatically. | |
| - Do NOT repeat questions that were already asked in previous turns. | |
| - Invent descriptive category names (e.g. "bolt_pattern", "thermal_rating", | |
| "load_capacity") — there is no fixed set. | |
| - Use snake_case for category names. | |
| - Severity: "blocking" = cannot proceed without it, "recommended" = should | |
| have for quality, "nice_to_have" = would improve but not essential. | |
| - Generate questions that reference the specific part being designed. | |
| - Suggestions should be concrete values appropriate for the part context. | |
| - Set responsible_agent to the agent best suited to handle the answer: | |
| "design", "engineering", "cnc", "cad", or "cam". | |
| - If no gaps exist, return has_gaps: false with empty lists. | |
| - Set severity on each question card matching the gap it addresses. | |
| Question abstraction level — CRITICAL: | |
| - Ask questions a normal person can answer, NOT questions requiring CAD or | |
| engineering expertise. The user describes INTENT; the agents derive the | |
| technical implementation. | |
| - NEVER ask for exact coordinates, precise radii, specific tolerance values, | |
| G-code parameters, or CadQuery implementation details. | |
| - GOOD: "Where should the mounting holes go?" with suggestions like | |
| "one in each corner", "evenly spaced along the edges", "centered on top". | |
| - BAD: "What are the precise X, Y, Z coordinates for the M4 mounting holes?" | |
| - GOOD: "Should the edges be sharp or rounded?" | |
| - BAD: "What fillet radius should be applied to the internal pocket edges?" | |
| - GOOD: "How thick should the walls be?" with suggestions like "thin | |
| (1-2mm)", "standard (3-5mm)", "heavy-duty (6mm+)". | |
| - BAD: "What is the minimum wall thickness in mm for the vertical ribs?" | |
| - Frame questions in everyday language. Offer 2-4 plain-language suggestions | |
| that map to concrete engineering values behind the scenes. | |
| - Technical details (coordinates, exact radii, tolerance classes, feed rates) | |
| are the agents' job to determine from the user's high-level answers. | |
| """ | |
| # ── LLM call ────────────────────────────────────────────────────────────────── | |
| def _build_llm(): | |
| """Build a crewai.LLM instance from gap analysis config.""" | |
| from crewai import LLM | |
| cfg = settings.gap_analysis | |
| return LLM(model=cfg.model, temperature=cfg.temperature, max_tokens=cfg.max_tokens) | |
| def _build_prompt( | |
| agent_responses: list[AgentResponse], | |
| state: DesignState, | |
| user_message: str, | |
| ) -> str: | |
| """Build the user-message prompt with state, responses, and user message.""" | |
| parts = [ | |
| "## Design State", | |
| state.model_dump_json(indent=2), | |
| "", | |
| "## Agent Responses", | |
| ] | |
| for resp in agent_responses: | |
| parts.append(f"### {resp.agent_id}") | |
| parts.append(resp.message) | |
| parts.append("") | |
| parts.append("## User Message") | |
| parts.append(user_message or "(none)") | |
| return "\n".join(parts) | |
| def _enrich_agent_metadata(result: GapAnalysisResult) -> None: | |
| """Populate agent_name and agent_color on each question card from AGENTS dict.""" | |
| for card in result.question_cards: | |
| agent_def = AGENTS.get(card.responsible_agent) | |
| if agent_def: | |
| card.agent_name = agent_def.name | |
| card.agent_color = agent_def.color | |
| else: | |
| card.agent_name = card.responsible_agent.title() | |
| card.agent_color = "#888888" | |
| def analyze_gaps( | |
| agent_responses: list[AgentResponse], | |
| state: DesignState, | |
| user_message: str = "", | |
| ) -> GapAnalysisResult: | |
| """Analyze agent responses for missing design information using an LLM. | |
| Returns a GapAnalysisResult with detected gaps and contextual question cards. | |
| On any failure, returns an empty result (gap analysis is non-blocking). | |
| """ | |
| try: | |
| llm = _build_llm() | |
| prompt = _build_prompt(agent_responses, state, user_message) | |
| messages = [ | |
| {"role": "system", "content": _SYSTEM_PROMPT}, | |
| {"role": "user", "content": prompt}, | |
| ] | |
| result = llm.call(messages, response_model=GapAnalysisResult) | |
| if not isinstance(result, GapAnalysisResult): | |
| logger.debug("Gap analysis LLM returned non-model response, parsing as JSON") | |
| result = GapAnalysisResult.model_validate_json(result) | |
| _enrich_agent_metadata(result) | |
| return result | |
| except Exception: | |
| logger.warning("Gap analysis LLM call failed", exc_info=True) | |
| return GapAnalysisResult() | |