neuralcad / agents /gap_analyzer.py
Daniel Tu
feat: constrain agent questions to user-friendly abstraction level (#13)
afd1605 unverified
"""LLM-based gap analyzer — semantically detects missing design information.
Uses a structured LLM call to analyze agent responses against the current
DesignState and generate contextual question cards for the UI.
"""
from __future__ import annotations
import logging
from typing import TYPE_CHECKING, Literal
from pydantic import BaseModel, Field
if TYPE_CHECKING:
from agents.agent_flow import AgentResponse
from agents.design_state import DesignState
logger = logging.getLogger(__name__)
# ── Models ────────────────────────────────────────────────────────────────────
class DetectedGap(BaseModel):
"""A single piece of missing information identified by the LLM."""
category: str
description: str
severity: Literal["blocking", "recommended", "nice_to_have"]
class GeneratedQuestionCard(BaseModel):
"""A contextual question card for the UI, generated by the LLM."""
category: str
question: str
responsible_agent: str
agent_name: str = ""
agent_color: str = ""
severity: Literal["blocking", "recommended", "nice_to_have"] = "recommended"
suggestions: list[str] = Field(default_factory=list)
allow_custom: bool = True
class GapAnalysisResult(BaseModel):
"""Top-level structured response from the gap analysis LLM call."""
has_gaps: bool = False
gaps: list[DetectedGap] = Field(default_factory=list)
question_cards: list[GeneratedQuestionCard] = Field(default_factory=list)
from config.settings import settings
from agents.definitions import AGENTS
# ── System prompt ─────────────────────────────────────────────────────────────
_SYSTEM_PROMPT = """\
You are a design gap analyzer for a CNC part design system.
Given the current design state, agent responses, and user message, identify
what information is still missing to produce a complete CNC-ready design.
Rules:
- Do NOT flag gaps for information already present in the design state.
- Do NOT flag gaps for information the user just provided in their message.
- Do NOT ask about the part name — the system derives it automatically.
- Do NOT repeat questions that were already asked in previous turns.
- Invent descriptive category names (e.g. "bolt_pattern", "thermal_rating",
"load_capacity") — there is no fixed set.
- Use snake_case for category names.
- Severity: "blocking" = cannot proceed without it, "recommended" = should
have for quality, "nice_to_have" = would improve but not essential.
- Generate questions that reference the specific part being designed.
- Suggestions should be concrete values appropriate for the part context.
- Set responsible_agent to the agent best suited to handle the answer:
"design", "engineering", "cnc", "cad", or "cam".
- If no gaps exist, return has_gaps: false with empty lists.
- Set severity on each question card matching the gap it addresses.
Question abstraction level — CRITICAL:
- Ask questions a normal person can answer, NOT questions requiring CAD or
engineering expertise. The user describes INTENT; the agents derive the
technical implementation.
- NEVER ask for exact coordinates, precise radii, specific tolerance values,
G-code parameters, or CadQuery implementation details.
- GOOD: "Where should the mounting holes go?" with suggestions like
"one in each corner", "evenly spaced along the edges", "centered on top".
- BAD: "What are the precise X, Y, Z coordinates for the M4 mounting holes?"
- GOOD: "Should the edges be sharp or rounded?"
- BAD: "What fillet radius should be applied to the internal pocket edges?"
- GOOD: "How thick should the walls be?" with suggestions like "thin
(1-2mm)", "standard (3-5mm)", "heavy-duty (6mm+)".
- BAD: "What is the minimum wall thickness in mm for the vertical ribs?"
- Frame questions in everyday language. Offer 2-4 plain-language suggestions
that map to concrete engineering values behind the scenes.
- Technical details (coordinates, exact radii, tolerance classes, feed rates)
are the agents' job to determine from the user's high-level answers.
"""
# ── LLM call ──────────────────────────────────────────────────────────────────
def _build_llm():
"""Build a crewai.LLM instance from gap analysis config."""
from crewai import LLM
cfg = settings.gap_analysis
return LLM(model=cfg.model, temperature=cfg.temperature, max_tokens=cfg.max_tokens)
def _build_prompt(
agent_responses: list[AgentResponse],
state: DesignState,
user_message: str,
) -> str:
"""Build the user-message prompt with state, responses, and user message."""
parts = [
"## Design State",
state.model_dump_json(indent=2),
"",
"## Agent Responses",
]
for resp in agent_responses:
parts.append(f"### {resp.agent_id}")
parts.append(resp.message)
parts.append("")
parts.append("## User Message")
parts.append(user_message or "(none)")
return "\n".join(parts)
def _enrich_agent_metadata(result: GapAnalysisResult) -> None:
"""Populate agent_name and agent_color on each question card from AGENTS dict."""
for card in result.question_cards:
agent_def = AGENTS.get(card.responsible_agent)
if agent_def:
card.agent_name = agent_def.name
card.agent_color = agent_def.color
else:
card.agent_name = card.responsible_agent.title()
card.agent_color = "#888888"
def analyze_gaps(
agent_responses: list[AgentResponse],
state: DesignState,
user_message: str = "",
) -> GapAnalysisResult:
"""Analyze agent responses for missing design information using an LLM.
Returns a GapAnalysisResult with detected gaps and contextual question cards.
On any failure, returns an empty result (gap analysis is non-blocking).
"""
try:
llm = _build_llm()
prompt = _build_prompt(agent_responses, state, user_message)
messages = [
{"role": "system", "content": _SYSTEM_PROMPT},
{"role": "user", "content": prompt},
]
result = llm.call(messages, response_model=GapAnalysisResult)
if not isinstance(result, GapAnalysisResult):
logger.debug("Gap analysis LLM returned non-model response, parsing as JSON")
result = GapAnalysisResult.model_validate_json(result)
_enrich_agent_metadata(result)
return result
except Exception:
logger.warning("Gap analysis LLM call failed", exc_info=True)
return GapAnalysisResult()