Spaces:

CallMeDaniel
/

neuralcad

Sleeping

App Files Files Community

Daniel Tu commited on Apr 11

Commit

ca5b1f7

unverified ·

2 Parent(s): 8a01e00 33f166d

Merge pull request #3 from danghoangnhan/feat/oop-config-foundation

Browse files

Files changed (19) hide show

.env.example +9 -0
agents/orchestrator.py +3 -17
config.yaml +259 -0
config/__init__.py +0 -0
config/settings.py +89 -0
core/backend_factory.py +64 -0
core/backends.py +49 -100
core/serializers.py +38 -0
core/types.py +86 -0
core/validator.py +19 -3
docs/superpowers/plans/2026-04-11-oop-config-foundation.md +844 -0
docs/superpowers/specs/2026-04-11-oop-config-crewai-design.md +1232 -0
pyproject.toml +2 -0
server/mcp.py +9 -53
tests/test_backend_factory.py +29 -0
tests/test_serializers.py +66 -0
tests/test_settings.py +50 -0
tests/test_types.py +88 -0
uv.lock +4 -0

.env.example ADDED Viewed

	@@ -0,0 +1,9 @@

+# API keys (copy to .env and fill in)
+ANTHROPIC_API_KEY=
+OPENAI_API_KEY=
+GOOGLE_API_KEY=
+# Optional overrides
+# NEURALCAD_OUTPUT_DIR=./output
+# NEURALCAD_WEB_PORT=5000
+# NEURALCAD_MCP_PORT=8000

agents/orchestrator.py CHANGED Viewed

@@ -28,6 +28,7 @@ from agents.prompts import (
 from agents.design_state import DesignState, extract_decisions
 from core.backends import LLMBackend, MockBackend
 from core.executor import execute_cadquery, export_all
 from core.validator import validate_for_cnc
@@ -112,23 +113,8 @@ def _execute_cad_code(
         "part_name": part_name,
         "stl_url": f"/api/models/{part_name}.stl",
         "step_url": f"/api/models/{part_name}.step",
-        "execution": {
-            "success": True,
-            "volume_mm3": exec_result.volume,
-            "bounding_box_mm": list(exec_result.bounding_box),
-            "face_count": exec_result.face_count,
-            "edge_count": exec_result.edge_count,
-        },
-        "validation": {
-            "machinable": validation.machinable,
-            "axis_recommendation": validation.axis_recommendation,
-            "error_count": validation.error_count,
-            "warning_count": validation.warning_count,
-            "issues": [
-                {"severity": i.severity, "category": i.category, "message": i.message}
-                for i in validation.issues
-            ],
-        },
     }

 from agents.design_state import DesignState, extract_decisions
 from core.backends import LLMBackend, MockBackend
 from core.executor import execute_cadquery, export_all
+from core.serializers import ExecutionResultSerializer, ValidationResultSerializer
 from core.validator import validate_for_cnc
         "part_name": part_name,
         "stl_url": f"/api/models/{part_name}.stl",
         "step_url": f"/api/models/{part_name}.step",
+        "execution": ExecutionResultSerializer.to_dict(exec_result),
+        "validation": ValidationResultSerializer.to_dict(validation),
     }

config.yaml ADDED Viewed

	@@ -0,0 +1,259 @@

+server:
+  web_port: 5000
+  mcp_port: 8000
+  mcp_name: text-to-cnc
+  cors_origins: ["*"]
+  mcp_startup_wait_seconds: 2
+paths:
+  output_dir: ./output
+  web_dir: ./web
+  prompts_dir: ./agents/prompts
+backends:
+  default: mock
+  models:
+    anthropic: claude-sonnet-4-20250514
+    openai: gpt-4o
+    gemini: gemini-2.5-flash
+  max_tokens: 4096
+  temperature: 0.2
+orchestration:
+  max_history: 30
+  max_active_agents: 3
+  max_retries: 2
+  max_decisions: 20
+  max_recent_decisions: 5
+  part_name_max_chars: 40
+validation:
+  min_wall_thickness_mm: 1.5
+  min_fillet_radius_mm: 1.0
+  max_pocket_depth_ratio: 4.0
+  max_part_size_mm: 500.0
+  min_part_size_mm: 1.0
+  min_hole_diameter_mm: 1.0
+  complexity_thresholds:
+    five_axis_faces: 100
+    three_plus_two_faces: 50
+export:
+  stl_tolerance: 0.01
+agents:
+  design:
+    name: Design Agent
+    role: Industrial Designer
+    color: "#7c3aed"
+    avatar: DA
+    goal: >
+      Understand the user's intent and propose optimal form factors,
+      shapes, and aesthetic choices for mechanical parts.
+    backstory: >
+      You are an experienced industrial designer specializing in mechanical
+      parts. You think about form, function, ergonomics, and visual appeal.
+      You ask clarifying questions about the part's purpose, environment,
+      and constraints before proposing designs. You suggest shapes,
+      proportions, and features that balance aesthetics with manufacturability.
+  engineering:
+    name: Engineering Agent
+    role: Mechanical Engineer
+    color: "#00b4d8"
+    avatar: EA
+    goal: >
+      Ensure parts are structurally sound with correct dimensions,
+      tolerances, materials, and fastener specifications.
+    backstory: >
+      You are a senior mechanical engineer with deep knowledge of materials
+      science, stress analysis, and fastener standards. You specify wall
+      thicknesses, fillet radii, clearance holes (M3=3.4mm, M4=4.5mm,
+      M5=5.5mm, M6=6.6mm, M8=9.0mm), and material recommendations. You
+      flag structural concerns and suggest reinforcements like ribs or
+      gussets when loads are significant.
+  cnc:
+    name: CNC Agent
+    role: CNC Manufacturing Advisor
+    color: "#00e676"
+    avatar: CA
+    goal: >
+      Advise on manufacturability: tool access, wall thickness limits,
+      pocket ratios, axis requirements, and cost implications.
+    backstory: >
+      You are a CNC machinist with 20 years of shop floor experience.
+      You know what tool geometries can reach, what aspect ratios cause
+      chatter, and when to recommend 3-axis vs 3+2 vs 5-axis. You flag
+      undercuts, thin walls (<1.5mm), deep pockets (>4:1 ratio), and
+      features that need special fixturing. You think about setup count
+      and machining time.
+  cad:
+    name: CAD Coder
+    role: CadQuery Code Generator
+    color: "#ffab40"
+    avatar: CC
+    goal: >
+      Generate valid CadQuery Python code that produces the agreed-upon 3D model.
+    backstory: >
+      You are an expert CadQuery programmer. You only speak when asked to
+      generate a preview or produce code. You take the design specifications
+      agreed upon by the team and translate them into precise CadQuery Python
+      code. Your code always assigns the result to a variable called `result`
+      as a cq.Workplane object.
+routing:
+  cad_trigger_keywords:
+    - generate
+    - build
+    - build it
+    - preview
+    - show me
+    - create
+    - create the model
+    - model it
+    - render
+    - code
+    - make it
+    - produce
+  keywords:
+    design:
+      - design
+      - look
+      - shape
+      - style
+      - form
+      - aesthetic
+      - appearance
+      - layout
+      - concept
+      - idea
+      - propose
+      - suggest
+      - bracket
+      - mount
+      - enclosure
+      - housing
+      - ergonomic
+      - profile
+      - contour
+    engineering:
+      - dimension
+      - tolerance
+      - material
+      - strength
+      - load
+      - stress
+      - thickness
+      - wall
+      - fillet
+      - radius
+      - clearance
+      - m2
+      - m3
+      - m4
+      - m5
+      - m6
+      - m8
+      - m10
+      - m12
+      - aluminum
+      - steel
+      - brass
+      - titanium
+      - nylon
+      - gear
+      - bearing
+      - flange
+      - heatsink
+      - fin
+      - rib
+      - bolt
+      - screw
+      - thread
+      - torque
+      - deflection
+      - hole
+      - bore
+      - shaft
+      - keyway
+      - spline
+    cnc:
+      - machine
+      - mill
+      - cnc
+      - manufacture
+      - machinable
+      - axis
+      - tool
+      - fixture
+      - setup
+      - pocket
+      - undercut
+      - access
+      - 3-axis
+      - 5-axis
+      - cost
+      - surface finish
+      - roughness
+      - endmill
+      - drill
+      - tap
+      - chamfer tool
+      - deburr
+      - setup count
+      - cycle time
+      - tolerance class
+materials:
+  - aluminum
+  - aluminium
+  - steel
+  - stainless steel
+  - brass
+  - copper
+  - titanium
+  - nylon
+  - delrin
+  - acetal
+  - abs
+  - polycarbonate
+  - peek
+material_grades:
+  "6061": aluminum 6061
+  "7075": aluminum 7075
+  "304": stainless steel 304
+  "316": stainless steel 316
+  t6: aluminum 6061-T6
+dimension_contexts:
+  wide: width
+  width: width
+  tall: height
+  height: height
+  high: height
+  thick: thickness
+  thickness: thickness
+  deep: depth
+  depth: depth
+  long: length
+  length: length
+  diameter: diameter
+  dia: diameter
+  radius: radius
+  arm: arm_length
+fasteners:
+  M2: 2.4
+  M3: 3.4
+  M4: 4.5
+  M5: 5.5
+  M6: 6.6
+  M8: 9.0
+  M10: 11.0
+  M12: 13.5
+fallback_messages:
+  design: "I'd love to help shape this design. Could you describe the part's purpose and any size constraints?"
+  engineering: "I can help with the structural details. What material and load conditions are we working with?"
+  cnc: "I'll check manufacturability once we have more design details. Any machining preferences (3-axis, 5-axis)?"
+  cad: "I'm ready to generate the model once the design is agreed upon. Say 'preview' when you're ready."

config/__init__.py ADDED Viewed

File without changes

config/settings.py ADDED Viewed

	@@ -0,0 +1,89 @@

+"""Centralized configuration — single source of truth for all NeuralCAD settings."""
+from __future__ import annotations
+from pathlib import Path
+from typing import Any
+import yaml
+from pydantic import Field
+from pydantic_settings import BaseSettings
+class Settings(BaseSettings):
+    """Loads .env for secrets, then overlays config.yaml for app config."""
+    # .env secrets
+    anthropic_api_key: str = ""
+    openai_api_key: str = ""
+    google_api_key: str = ""
+    # Overridable via env vars
+    neuralcad_output_dir: str = ""
+    neuralcad_web_port: int = 0
+    neuralcad_mcp_port: int = 0
+    # Loaded from config.yaml
+    server: dict[str, Any] = Field(default_factory=dict)
+    paths: dict[str, Any] = Field(default_factory=dict)
+    backends: dict[str, Any] = Field(default_factory=dict)
+    orchestration: dict[str, Any] = Field(default_factory=dict)
+    validation: dict[str, Any] = Field(default_factory=dict)
+    export: dict[str, Any] = Field(default_factory=dict)
+    agents: dict[str, Any] = Field(default_factory=dict)
+    routing: dict[str, Any] = Field(default_factory=dict)
+    materials: list[str] = Field(default_factory=list)
+    material_grades: dict[str, str] = Field(default_factory=dict)
+    dimension_contexts: dict[str, str] = Field(default_factory=dict)
+    fasteners: dict[str, float] = Field(default_factory=dict)
+    fallback_messages: dict[str, str] = Field(default_factory=dict)
+    model_config = {"env_file": ".env", "extra": "ignore"}
+    def model_post_init(self, __context: Any) -> None:
+        config_path = Path(__file__).parent.parent / "config.yaml"
+        if config_path.exists():
+            with open(config_path) as f:
+                data = yaml.safe_load(f) or {}
+            for key, value in data.items():
+                if hasattr(self, key):
+                    current = getattr(self, key)
+                    if not current or (isinstance(current, (dict, list)) and len(current) == 0):
+                        object.__setattr__(self, key, value)
+    @property
+    def output_dir(self) -> Path:
+        if self.neuralcad_output_dir:
+            return Path(self.neuralcad_output_dir)
+        return Path(self.paths.get("output_dir", "./output"))
+    @property
+    def web_port(self) -> int:
+        if self.neuralcad_web_port:
+            return self.neuralcad_web_port
+        return self.server.get("web_port", 5000)
+    @property
+    def mcp_port(self) -> int:
+        if self.neuralcad_mcp_port:
+            return self.neuralcad_mcp_port
+        return self.server.get("mcp_port", 8000)
+    @property
+    def default_backend(self) -> str:
+        return self.backends.get("default", "mock")
+    @property
+    def model_for(self) -> dict[str, str]:
+        return self.backends.get("models", {})
+    @property
+    def max_tokens(self) -> int:
+        return self.backends.get("max_tokens", 4096)
+    @property
+    def temperature(self) -> float:
+        return self.backends.get("temperature", 0.2)
+settings = Settings()

core/backend_factory.py ADDED Viewed

	@@ -0,0 +1,64 @@

+"""Backend factory — centralized creation of LLM backends.
+Replaces scattered if/elif backend selection across mcp.py, routes.py, web.py.
+"""
+from __future__ import annotations
+import logging
+from core.types import LLMBackend
+logger = logging.getLogger(__name__)
+class BackendFactory:
+    """Registry and factory for LLM backends."""
+    _registry: dict[str, type[LLMBackend]] = {}
+    @classmethod
+    def register(cls, name: str, backend_cls: type[LLMBackend]) -> None:
+        cls._registry[name] = backend_cls
+    @classmethod
+    def create(cls, name: str, **kwargs) -> LLMBackend:
+        if name not in cls._registry:
+            raise ValueError(f"Unknown backend: {name!r}. Available: {list(cls._registry.keys())}")
+        return cls._registry[name](**kwargs)
+    @classmethod
+    def create_safe(cls, name: str, **kwargs) -> LLMBackend:
+        """Create backend, falling back to mock on failure."""
+        try:
+            return cls.create(name, **kwargs)
+        except Exception as exc:
+            logger.warning("Backend %r unavailable (%s), falling back to mock", name, exc)
+            return cls.create("mock")
+def _register_defaults() -> None:
+    """Register all built-in backends. Called at module load."""
+    from core.backends import MockBackend
+    BackendFactory.register("mock", MockBackend)
+    try:
+        from core.backends import AnthropicBackend
+        BackendFactory.register("anthropic", AnthropicBackend)
+    except Exception:
+        pass
+    try:
+        from core.backends import OpenAIBackend
+        BackendFactory.register("openai", OpenAIBackend)
+    except Exception:
+        pass
+    try:
+        from core.backends import GeminiBackend
+        BackendFactory.register("gemini", GeminiBackend)
+    except Exception:
+        pass
+_register_defaults()

core/backends.py CHANGED Viewed

@@ -14,92 +14,49 @@ import mimetypes
 import os
 import re
 from pathlib import Path
-from typing import Optional
-# ── LLM Backends ──────────────────────────────────────────────────────────
-class LLMBackend:
-    """Base class for LLM code generation backends."""
-    def generate(self, messages: list[dict]) -> str:
-        raise NotImplementedError
-    def generate_with_image(self, messages: list[dict], image_path: str | Path) -> str:
-        """Generate code from messages that include an image.
-        Override in backends that support vision."""
-        raise NotImplementedError(
-            f"{self.__class__.__name__} does not support image input"
-        )
 class AnthropicBackend(LLMBackend):
     """Generate CadQuery code using Anthropic Claude."""
-    def __init__(
-        self, model: str = "claude-sonnet-4-20250514", api_key: Optional[str] = None
-    ):
         import anthropic
-        self.client = anthropic.Anthropic(
-            api_key=api_key or os.environ.get("ANTHROPIC_API_KEY")
-        )
-        self.model = model
     def generate(self, messages: list[dict]) -> str:
-        # Anthropic uses system param separately
-        system_msg = ""
-        user_messages = []
-        for m in messages:
-            if m["role"] == "system":
-                system_msg = m["content"]
-            else:
-                user_messages.append(m)
         response = self.client.messages.create(
             model=self.model,
-            max_tokens=4096,
             system=system_msg,
             messages=user_messages,
         )
         return response.content[0].text
     def generate_with_image(self, messages: list[dict], image_path: str | Path) -> str:
         image_path = Path(image_path)
         media_type = mimetypes.guess_type(str(image_path))[0] or "image/png"
         image_data = base64.b64encode(image_path.read_bytes()).decode("utf-8")
-        system_msg = ""
-        user_messages = []
-        for m in messages:
-            if m["role"] == "system":
-                system_msg = m["content"]
-            else:
-                msg = dict(m)
-                # Inject image into the last user message
-                if msg["role"] == "user" and msg is not m:
-                    user_messages.append(msg)
-                else:
-                    user_messages.append(msg)
         # Replace last user message content with multimodal blocks
         last_user = user_messages[-1]
         last_user["content"] = [
-            {
-                "type": "image",
-                "source": {
-                    "type": "base64",
-                    "media_type": media_type,
-                    "data": image_data,
-                },
-            },
             {"type": "text", "text": last_user["content"]},
         ]
         response = self.client.messages.create(
             model=self.model,
-            max_tokens=4096,
             system=system_msg,
             messages=user_messages,
         )
@@ -109,22 +66,25 @@ class AnthropicBackend(LLMBackend):
 class OpenAIBackend(LLMBackend):
     """Generate CadQuery code using OpenAI GPT-4o."""
-    def __init__(self, model: str = "gpt-4o", api_key: Optional[str] = None):
         import openai
-        self.client = openai.OpenAI(api_key=api_key or os.environ.get("OPENAI_API_KEY"))
-        self.model = model
     def generate(self, messages: list[dict]) -> str:
         response = self.client.chat.completions.create(
             model=self.model,
             messages=messages,
-            max_tokens=4096,
-            temperature=0.2,
         )
         return response.choices[0].message.content
     def generate_with_image(self, messages: list[dict], image_path: str | Path) -> str:
         image_path = Path(image_path)
         media_type = mimetypes.guess_type(str(image_path))[0] or "image/png"
         image_data = base64.b64encode(image_path.read_bytes()).decode("utf-8")
@@ -141,8 +101,8 @@ class OpenAIBackend(LLMBackend):
         response = self.client.chat.completions.create(
             model=self.model,
             messages=patched,
-            max_tokens=4096,
-            temperature=0.2,
         )
         return response.choices[0].message.content
@@ -150,50 +110,46 @@ class OpenAIBackend(LLMBackend):
 class GeminiBackend(LLMBackend):
     """Generate CadQuery code using Google Gemini (free tier available)."""
-    def __init__(self, model: str = "gemini-2.5-flash", api_key: Optional[str] = None):
         from google import genai
-        self.client = genai.Client(api_key=api_key or os.environ.get("GEMINI_API_KEY"))
-        self.model = model
     def generate(self, messages: list[dict]) -> str:
-        # Convert messages to Gemini format: system instruction + contents
-        system_msg = ""
         contents = []
-        for m in messages:
-            if m["role"] == "system":
-                system_msg = m["content"]
-            elif m["role"] == "user":
                 contents.append({"role": "user", "parts": [{"text": m["content"]}]})
             elif m["role"] == "assistant":
                 contents.append({"role": "model", "parts": [{"text": m["content"]}]})
-        from google.genai import types
         response = self.client.models.generate_content(
             model=self.model,
             contents=contents,
             config=types.GenerateContentConfig(
                 system_instruction=system_msg,
-                max_output_tokens=4096,
-                temperature=0.2,
             ),
         )
         return response.text
     def generate_with_image(self, messages: list[dict], image_path: str | Path) -> str:
         from google.genai import types
         image_path = Path(image_path)
         image_data = image_path.read_bytes()
         media_type = mimetypes.guess_type(str(image_path))[0] or "image/png"
-        system_msg = ""
         contents = []
-        for m in messages:
-            if m["role"] == "system":
-                system_msg = m["content"]
-            elif m["role"] == "user":
                 contents.append({"role": "user", "parts": [{"text": m["content"]}]})
             elif m["role"] == "assistant":
                 contents.append({"role": "model", "parts": [{"text": m["content"]}]})
@@ -209,8 +165,8 @@ class GeminiBackend(LLMBackend):
             contents=contents,
             config=types.GenerateContentConfig(
                 system_instruction=system_msg,
-                max_output_tokens=4096,
-                temperature=0.2,
             ),
         )
         return response.text
@@ -240,18 +196,6 @@ class MockBackend(LLMBackend):
         "twenty": 20,
     }
-    # Metric thread clearance hole diameters
-    _THREAD_CLEARANCE = {
-        "m2": 2.4,
-        "m3": 3.4,
-        "m4": 4.5,
-        "m5": 5.5,
-        "m6": 6.6,
-        "m8": 9.0,
-        "m10": 11.0,
-        "m12": 13.5,
-    }
     # Shape detection patterns → base shape key
     _SHAPE_PATTERNS = {
         "cylinder": [
@@ -312,6 +256,11 @@ class MockBackend(LLMBackend):
         "boss": ["boss", "bosses", "standoff", "standoffs", "pillar"],
     }
     def _parse_prompt(self, text: str) -> dict:
         """Extract dimensions, shape, and features from natural language."""
         lower = text.lower()
@@ -325,7 +274,7 @@ class MockBackend(LLMBackend):
         hole_dia = None
         if thread_match:
             key = f"m{thread_match.group(1)}"
-            hole_dia = self._THREAD_CLEARANCE.get(
                 key, float(thread_match.group(1)) * 1.1
             )

 import os
 import re
 from pathlib import Path
+from core.types import LLMBackend
+# ── LLM Backends ──────────────────────────────────────────────────────────
 class AnthropicBackend(LLMBackend):
     """Generate CadQuery code using Anthropic Claude."""
+    def __init__(self, model: str | None = None, api_key: str | None = None):
         import anthropic
+        from config.settings import settings
+        self.model = model or settings.model_for.get("anthropic", "claude-sonnet-4-20250514")
+        key = api_key or settings.anthropic_api_key or os.environ.get("ANTHROPIC_API_KEY")
+        self.client = anthropic.Anthropic(api_key=key)
     def generate(self, messages: list[dict]) -> str:
+        from config.settings import settings
+        system_msg, user_messages = self.split_system_message(messages)
         response = self.client.messages.create(
             model=self.model,
+            max_tokens=settings.max_tokens,
             system=system_msg,
             messages=user_messages,
         )
         return response.content[0].text
     def generate_with_image(self, messages: list[dict], image_path: str | Path) -> str:
+        from config.settings import settings
         image_path = Path(image_path)
         media_type = mimetypes.guess_type(str(image_path))[0] or "image/png"
         image_data = base64.b64encode(image_path.read_bytes()).decode("utf-8")
+        system_msg, user_messages = self.split_system_message(messages)
         # Replace last user message content with multimodal blocks
         last_user = user_messages[-1]
         last_user["content"] = [
+            {"type": "image", "source": {"type": "base64", "media_type": media_type, "data": image_data}},
             {"type": "text", "text": last_user["content"]},
         ]
         response = self.client.messages.create(
             model=self.model,
+            max_tokens=settings.max_tokens,
             system=system_msg,
             messages=user_messages,
         )
 class OpenAIBackend(LLMBackend):
     """Generate CadQuery code using OpenAI GPT-4o."""
+    def __init__(self, model: str | None = None, api_key: str | None = None):
         import openai
+        from config.settings import settings
+        self.model = model or settings.model_for.get("openai", "gpt-4o")
+        key = api_key or settings.openai_api_key or os.environ.get("OPENAI_API_KEY")
+        self.client = openai.OpenAI(api_key=key)
     def generate(self, messages: list[dict]) -> str:
+        from config.settings import settings
         response = self.client.chat.completions.create(
             model=self.model,
             messages=messages,
+            max_tokens=settings.max_tokens,
+            temperature=settings.temperature,
         )
         return response.choices[0].message.content
     def generate_with_image(self, messages: list[dict], image_path: str | Path) -> str:
+        from config.settings import settings
         image_path = Path(image_path)
         media_type = mimetypes.guess_type(str(image_path))[0] or "image/png"
         image_data = base64.b64encode(image_path.read_bytes()).decode("utf-8")
         response = self.client.chat.completions.create(
             model=self.model,
             messages=patched,
+            max_tokens=settings.max_tokens,
+            temperature=settings.temperature,
         )
         return response.choices[0].message.content
 class GeminiBackend(LLMBackend):
     """Generate CadQuery code using Google Gemini (free tier available)."""
+    def __init__(self, model: str | None = None, api_key: str | None = None):
         from google import genai
+        from config.settings import settings
+        self.model = model or settings.model_for.get("gemini", "gemini-2.5-flash")
+        key = api_key or settings.google_api_key or os.environ.get("GEMINI_API_KEY") or os.environ.get("GOOGLE_API_KEY")
+        self.client = genai.Client(api_key=key)
     def generate(self, messages: list[dict]) -> str:
+        from config.settings import settings
+        from google.genai import types
+        system_msg, other_messages = self.split_system_message(messages)
         contents = []
+        for m in other_messages:
+            if m["role"] == "user":
                 contents.append({"role": "user", "parts": [{"text": m["content"]}]})
             elif m["role"] == "assistant":
                 contents.append({"role": "model", "parts": [{"text": m["content"]}]})
         response = self.client.models.generate_content(
             model=self.model,
             contents=contents,
             config=types.GenerateContentConfig(
                 system_instruction=system_msg,
+                max_output_tokens=settings.max_tokens,
+                temperature=settings.temperature,
             ),
         )
         return response.text
     def generate_with_image(self, messages: list[dict], image_path: str | Path) -> str:
+        from config.settings import settings
         from google.genai import types
         image_path = Path(image_path)
         image_data = image_path.read_bytes()
         media_type = mimetypes.guess_type(str(image_path))[0] or "image/png"
+        system_msg, other_messages = self.split_system_message(messages)
         contents = []
+        for m in other_messages:
+            if m["role"] == "user":
                 contents.append({"role": "user", "parts": [{"text": m["content"]}]})
             elif m["role"] == "assistant":
                 contents.append({"role": "model", "parts": [{"text": m["content"]}]})
             contents=contents,
             config=types.GenerateContentConfig(
                 system_instruction=system_msg,
+                max_output_tokens=settings.max_tokens,
+                temperature=settings.temperature,
             ),
         )
         return response.text
         "twenty": 20,
     }
     # Shape detection patterns → base shape key
     _SHAPE_PATTERNS = {
         "cylinder": [
         "boss": ["boss", "bosses", "standoff", "standoffs", "pillar"],
     }
+    @property
+    def _thread_clearance(self) -> dict[str, float]:
+        from config.settings import settings
+        return settings.fasteners
     def _parse_prompt(self, text: str) -> dict:
         """Extract dimensions, shape, and features from natural language."""
         lower = text.lower()
         hole_dia = None
         if thread_match:
             key = f"m{thread_match.group(1)}"
+            hole_dia = self._thread_clearance.get(
                 key, float(thread_match.group(1)) * 1.1
             )

core/serializers.py ADDED Viewed

	@@ -0,0 +1,38 @@

+"""Serializers for execution and validation results.
+Eliminates duplicated dict-building code across mcp.py and orchestrator.py.
+"""
+from __future__ import annotations
+class ExecutionResultSerializer:
+    """Serialize ExecutionResult to JSON-ready dict."""
+    @staticmethod
+    def to_dict(result) -> dict:
+        return {
+            "success": result.success,
+            "volume_mm3": result.volume,
+            "bounding_box_mm": list(result.bounding_box) if result.bounding_box else [],
+            "face_count": result.face_count,
+            "edge_count": result.edge_count,
+            "error": result.error,
+        }
+class ValidationResultSerializer:
+    """Serialize CNCValidationResult to JSON-ready dict."""
+    @staticmethod
+    def to_dict(result) -> dict:
+        return {
+            "machinable": result.machinable,
+            "axis_recommendation": result.axis_recommendation,
+            "error_count": result.error_count,
+            "warning_count": result.warning_count,
+            "issues": [
+                {"severity": i.severity, "category": i.category, "message": i.message}
+                for i in result.issues
+            ],
+        }

core/types.py ADDED Viewed

	@@ -0,0 +1,86 @@

+"""Shared types, enums, dataclasses, and ABCs for NeuralCAD."""
+from __future__ import annotations
+from abc import ABC, abstractmethod
+from dataclasses import dataclass, field
+from enum import Enum
+from pathlib import Path
+from typing import Optional
+class BackendName(str, Enum):
+    MOCK = "mock"
+    ANTHROPIC = "anthropic"
+    OPENAI = "openai"
+    GEMINI = "gemini"
+class AgentId(str, Enum):
+    DESIGN = "design"
+    ENGINEERING = "engineering"
+    CNC = "cnc"
+    CAD = "cad"
+@dataclass
+class AgentResponse:
+    """A single agent's response in a chat turn."""
+    agent_id: str
+    agent_name: str
+    message: str
+    color: str
+    avatar: str
+    code: Optional[str] = None
+    def to_dict(self) -> dict:
+        return {
+            "agent_id": self.agent_id,
+            "agent_name": self.agent_name,
+            "message": self.message,
+            "color": self.color,
+            "avatar": self.avatar,
+            "code": self.code,
+        }
+@dataclass
+class ChatResult:
+    """Result of a multi-agent chat turn."""
+    responses: list[AgentResponse]
+    preview: Optional[dict] = None
+    design_state: dict = field(default_factory=dict)
+    def to_dict(self) -> dict:
+        return {
+            "responses": [r.to_dict() for r in self.responses],
+            "preview": self.preview,
+            "design_state": self.design_state,
+        }
+class LLMBackend(ABC):
+    """Abstract base class for LLM code generation backends."""
+    @abstractmethod
+    def generate(self, messages: list[dict]) -> str:
+        """Generate text from a list of messages."""
+        ...
+    def generate_with_image(self, messages: list[dict], image_path: str | Path) -> str:
+        """Generate text from messages that include an image."""
+        raise NotImplementedError(
+            f"{type(self).__name__} does not support image input"
+        )
+    @staticmethod
+    def split_system_message(messages: list[dict]) -> tuple[str, list[dict]]:
+        """Extract system message from a message list."""
+        system_msg = ""
+        user_messages = []
+        for m in messages:
+            if m["role"] == "system":
+                system_msg = m["content"]
+            else:
+                user_messages.append(m)
+        return system_msg, user_messages

core/validator.py CHANGED Viewed

@@ -61,6 +61,17 @@ DEFAULT_CONFIG = {
 }
 def validate_for_cnc(
     workplane: cq.Workplane,
     part_name: str = "Part",
@@ -70,7 +81,7 @@ def validate_for_cnc(
     Run manufacturability checks on a CadQuery solid.
     Returns a CNCValidationResult with issues found.
     """
-    cfg = {**DEFAULT_CONFIG, **(config or {})}
     result = CNCValidationResult(part_name=part_name)
     shape = workplane.val()
     bb = shape.BoundingBox()
@@ -127,7 +138,12 @@ def validate_for_cnc(
     n_faces = len(faces)
     n_edges = len(edges)
-    if n_faces > 100:
         result.issues.append(
             CNCIssue(
                 "warning",
@@ -136,7 +152,7 @@ def validate_for_cnc(
             )
         )
         result.axis_recommendation = "5-axis"
-    elif n_faces > 50:
         result.issues.append(
             CNCIssue(
                 "info",

 }
+def _get_validation_config(overrides: dict | None = None) -> dict:
+    """Build validation config from settings + optional overrides."""
+    from config.settings import settings
+    base = dict(settings.validation)
+    # Remove nested dicts (like complexity_thresholds) from the flat config
+    base.pop("complexity_thresholds", None)
+    if overrides:
+        base.update(overrides)
+    return base
 def validate_for_cnc(
     workplane: cq.Workplane,
     part_name: str = "Part",
     Run manufacturability checks on a CadQuery solid.
     Returns a CNCValidationResult with issues found.
     """
+    cfg = _get_validation_config(config)
     result = CNCValidationResult(part_name=part_name)
     shape = workplane.val()
     bb = shape.BoundingBox()
     n_faces = len(faces)
     n_edges = len(edges)
+    from config.settings import settings
+    complexity = settings.validation.get("complexity_thresholds", {})
+    five_axis_faces = complexity.get("five_axis_faces", 100)
+    three_plus_two_faces = complexity.get("three_plus_two_faces", 50)
+    if n_faces > five_axis_faces:
         result.issues.append(
             CNCIssue(
                 "warning",
             )
         )
         result.axis_recommendation = "5-axis"
+    elif n_faces > three_plus_two_faces:
         result.issues.append(
             CNCIssue(
                 "info",

docs/superpowers/plans/2026-04-11-oop-config-foundation.md ADDED Viewed

	@@ -0,0 +1,844 @@

+# OOP Config Foundation Implementation Plan
+> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking.
+**Goal:** Extract all hardcoded values to config.yaml/.env, add proper ABCs and type system, config-drive all backends, add backend factory, and extract serializers.
+**Architecture:** A Pydantic Settings singleton loads .env secrets and config.yaml application config. All modules import from this instead of defining their own constants. LLMBackend becomes a proper ABC. BackendFactory replaces scattered if/elif backend selection. Serializers eliminate 7x duplication.
+**Tech Stack:** pydantic-settings, pyyaml, abc, enum, dataclasses
+---
+### Task 1: Add Dependencies
+**Files:**
+- Modify: `pyproject.toml`
+- [ ] **Step 1: Add pydantic-settings and pyyaml to dependencies**
+Add to the dependencies list in pyproject.toml:
+```toml
+    "pydantic-settings>=2.0.0",
+    "pyyaml>=6.0",
+```
+- [ ] **Step 2: Install**
+Run: `cd /home/daniel/NeuralCAD && uv sync`
+- [ ] **Step 3: Commit**
+```bash
+git add pyproject.toml uv.lock
+git commit -m "build: add pydantic-settings and pyyaml dependencies"
+```
+---
+### Task 2: Create config.yaml and .env.example
+**Files:**
+- Create: `config.yaml`
+- Create: `.env.example`
+- [ ] **Step 1: Create config.yaml**
+Create `/home/daniel/NeuralCAD/config.yaml` with the full YAML content from spec section 1.2 (server, paths, backends, orchestration, validation, export, agents, routing, materials, material_grades, dimension_contexts, fasteners, fallback_messages). This is the single source of truth for all non-secret application configuration.
+- [ ] **Step 2: Create .env.example**
+```
+# API keys (copy to .env and fill in)
+ANTHROPIC_API_KEY=
+OPENAI_API_KEY=
+GOOGLE_API_KEY=
+# Optional overrides
+# NEURALCAD_OUTPUT_DIR=./output
+# NEURALCAD_WEB_PORT=5000
+# NEURALCAD_MCP_PORT=8000
+```
+- [ ] **Step 3: Add .env to .gitignore**
+Append `.env` to `.gitignore` if not already there.
+- [ ] **Step 4: Commit**
+```bash
+git add config.yaml .env.example .gitignore
+git commit -m "config: add config.yaml and .env.example"
+```
+---
+### Task 3: Create Settings Singleton
+**Files:**
+- Create: `config/__init__.py`
+- Create: `config/settings.py`
+- Test: `tests/test_settings.py`
+- [ ] **Step 1: Write test for Settings**
+```python
+"""Tests for config/settings.py."""
+import pytest
+from pathlib import Path
+class TestSettings:
+    def test_loads_config_yaml(self):
+        from config.settings import settings
+        assert settings.agents  # Should have loaded agents from config.yaml
+        assert "design" in settings.agents
+    def test_output_dir_property(self):
+        from config.settings import settings
+        assert isinstance(settings.output_dir, Path)
+    def test_web_port_property(self):
+        from config.settings import settings
+        assert isinstance(settings.web_port, int)
+        assert settings.web_port > 0
+    def test_model_for_property(self):
+        from config.settings import settings
+        models = settings.model_for
+        assert "anthropic" in models
+        assert "openai" in models
+        assert "gemini" in models
+    def test_max_tokens_property(self):
+        from config.settings import settings
+        assert settings.max_tokens == 4096
+    def test_temperature_property(self):
+        from config.settings import settings
+        assert settings.temperature == 0.2
+    def test_validation_config(self):
+        from config.settings import settings
+        assert settings.validation["min_wall_thickness_mm"] == 1.5
+    def test_routing_keywords_loaded(self):
+        from config.settings import settings
+        assert "design" in settings.routing["keywords"]
+        assert "engineering" in settings.routing["keywords"]
+    def test_fasteners_loaded(self):
+        from config.settings import settings
+        assert settings.fasteners["M6"] == 6.6
+    def test_materials_loaded(self):
+        from config.settings import settings
+        assert "aluminum" in settings.materials
+```
+- [ ] **Step 2: Run test to verify it fails**
+Run: `python -m pytest tests/test_settings.py -v`
+Expected: FAIL (config module doesn't exist)
+- [ ] **Step 3: Create config/__init__.py**
+Empty file.
+- [ ] **Step 4: Create config/settings.py**
+```python
+"""Centralized configuration — single source of truth for all NeuralCAD settings."""
+from __future__ import annotations
+from pathlib import Path
+from typing import Any
+import yaml
+from pydantic import Field
+from pydantic_settings import BaseSettings
+class Settings(BaseSettings):
+    """Loads .env for secrets, then overlays config.yaml for app config."""
+    # .env secrets
+    anthropic_api_key: str = ""
+    openai_api_key: str = ""
+    google_api_key: str = ""
+    # Overridable via env vars
+    neuralcad_output_dir: str = ""
+    neuralcad_web_port: int = 0
+    neuralcad_mcp_port: int = 0
+    # Loaded from config.yaml
+    server: dict[str, Any] = Field(default_factory=dict)
+    paths: dict[str, Any] = Field(default_factory=dict)
+    backends: dict[str, Any] = Field(default_factory=dict)
+    orchestration: dict[str, Any] = Field(default_factory=dict)
+    validation: dict[str, Any] = Field(default_factory=dict)
+    export: dict[str, Any] = Field(default_factory=dict)
+    agents: dict[str, Any] = Field(default_factory=dict)
+    routing: dict[str, Any] = Field(default_factory=dict)
+    materials: list[str] = Field(default_factory=list)
+    material_grades: dict[str, str] = Field(default_factory=dict)
+    dimension_contexts: dict[str, str] = Field(default_factory=dict)
+    fasteners: dict[str, float] = Field(default_factory=dict)
+    fallback_messages: dict[str, str] = Field(default_factory=dict)
+    model_config = {"env_file": ".env", "extra": "ignore"}
+    def model_post_init(self, __context: Any) -> None:
+        config_path = Path(__file__).parent.parent / "config.yaml"
+        if config_path.exists():
+            with open(config_path) as f:
+                data = yaml.safe_load(f) or {}
+            for key, value in data.items():
+                if hasattr(self, key):
+                    current = getattr(self, key)
+                    # Only override if current value is empty/default
+                    if not current or (isinstance(current, (dict, list)) and len(current) == 0):
+                        object.__setattr__(self, key, value)
+    # ── Convenience properties ──────────────────────────────────────────
+    @property
+    def output_dir(self) -> Path:
+        if self.neuralcad_output_dir:
+            return Path(self.neuralcad_output_dir)
+        return Path(self.paths.get("output_dir", "./output"))
+    @property
+    def web_port(self) -> int:
+        if self.neuralcad_web_port:
+            return self.neuralcad_web_port
+        return self.server.get("web_port", 5000)
+    @property
+    def mcp_port(self) -> int:
+        if self.neuralcad_mcp_port:
+            return self.neuralcad_mcp_port
+        return self.server.get("mcp_port", 8000)
+    @property
+    def default_backend(self) -> str:
+        return self.backends.get("default", "mock")
+    @property
+    def model_for(self) -> dict[str, str]:
+        return self.backends.get("models", {})
+    @property
+    def max_tokens(self) -> int:
+        return self.backends.get("max_tokens", 4096)
+    @property
+    def temperature(self) -> float:
+        return self.backends.get("temperature", 0.2)
+settings = Settings()
+```
+- [ ] **Step 5: Run tests**
+Run: `python -m pytest tests/test_settings.py -v`
+Expected: All PASS
+- [ ] **Step 6: Commit**
+```bash
+git add config/__init__.py config/settings.py tests/test_settings.py
+git commit -m "feat: add Settings singleton with config.yaml + .env loading"
+```
+---
+### Task 4: Create Type System (Enums, Dataclasses, ABC)
+**Files:**
+- Create: `core/types.py`
+- Test: `tests/test_types.py`
+- [ ] **Step 1: Write tests**
+```python
+"""Tests for core/types.py — enums, dataclasses, ABC."""
+from core.types import BackendName, AgentId, AgentResponse, ChatResult, LLMBackend
+class TestEnums:
+    def test_backend_names(self):
+        assert BackendName.MOCK == "mock"
+        assert BackendName.ANTHROPIC == "anthropic"
+        assert BackendName.OPENAI == "openai"
+        assert BackendName.GEMINI == "gemini"
+    def test_agent_ids(self):
+        assert AgentId.DESIGN == "design"
+        assert AgentId.ENGINEERING == "engineering"
+        assert AgentId.CNC == "cnc"
+        assert AgentId.CAD == "cad"
+    def test_backend_name_is_string(self):
+        assert isinstance(BackendName.MOCK, str)
+        assert BackendName.MOCK in {"mock", "anthropic"}
+class TestAgentResponse:
+    def test_create(self):
+        r = AgentResponse(agent_id="design", agent_name="Design Agent", message="hello", color="#7c3aed", avatar="DA")
+        assert r.agent_id == "design"
+        assert r.code is None
+    def test_with_code(self):
+        r = AgentResponse(agent_id="cad", agent_name="CAD", message="done", color="#ffab40", avatar="CC", code="result = cq.Workplane().box(10,10,10)")
+        assert r.code is not None
+    def test_to_dict(self):
+        r = AgentResponse(agent_id="design", agent_name="Design Agent", message="hi", color="#7c3aed", avatar="DA")
+        d = r.to_dict()
+        assert d["agent_id"] == "design"
+        assert d["message"] == "hi"
+        assert "code" in d
+class TestChatResult:
+    def test_create_empty(self):
+        result = ChatResult(responses=[])
+        assert result.preview is None
+        assert result.design_state == {}
+    def test_to_dict(self):
+        r = AgentResponse(agent_id="design", agent_name="D", message="hi", color="#fff", avatar="D")
+        result = ChatResult(responses=[r])
+        d = result.to_dict()
+        assert len(d["responses"]) == 1
+        assert d["preview"] is None
+class TestLLMBackendABC:
+    def test_cannot_instantiate(self):
+        import pytest
+        with pytest.raises(TypeError):
+            LLMBackend()
+    def test_subclass_must_implement_generate(self):
+        class Incomplete(LLMBackend):
+            pass
+        import pytest
+        with pytest.raises(TypeError):
+            Incomplete()
+    def test_subclass_with_generate(self):
+        class Complete(LLMBackend):
+            def generate(self, messages):
+                return "ok"
+        b = Complete()
+        assert b.generate([]) == "ok"
+```
+- [ ] **Step 2: Run tests to verify failure**
+Run: `python -m pytest tests/test_types.py -v`
+- [ ] **Step 3: Create core/types.py**
+```python
+"""Shared types, enums, dataclasses, and ABCs for NeuralCAD."""
+from __future__ import annotations
+from abc import ABC, abstractmethod
+from dataclasses import dataclass, field
+from enum import Enum
+from pathlib import Path
+from typing import Optional
+class BackendName(str, Enum):
+    MOCK = "mock"
+    ANTHROPIC = "anthropic"
+    OPENAI = "openai"
+    GEMINI = "gemini"
+class AgentId(str, Enum):
+    DESIGN = "design"
+    ENGINEERING = "engineering"
+    CNC = "cnc"
+    CAD = "cad"
+@dataclass
+class AgentResponse:
+    """A single agent's response in a chat turn."""
+    agent_id: str
+    agent_name: str
+    message: str
+    color: str
+    avatar: str
+    code: Optional[str] = None
+    def to_dict(self) -> dict:
+        return {
+            "agent_id": self.agent_id,
+            "agent_name": self.agent_name,
+            "message": self.message,
+            "color": self.color,
+            "avatar": self.avatar,
+            "code": self.code,
+        }
+@dataclass
+class ChatResult:
+    """Result of a multi-agent chat turn."""
+    responses: list[AgentResponse]
+    preview: Optional[dict] = None
+    design_state: dict = field(default_factory=dict)
+    def to_dict(self) -> dict:
+        return {
+            "responses": [r.to_dict() for r in self.responses],
+            "preview": self.preview,
+            "design_state": self.design_state,
+        }
+class LLMBackend(ABC):
+    """Abstract base class for LLM code generation backends."""
+    @abstractmethod
+    def generate(self, messages: list[dict]) -> str:
+        """Generate text from a list of messages."""
+        ...
+    def generate_with_image(self, messages: list[dict], image_path: str | Path) -> str:
+        """Generate text from messages that include an image."""
+        raise NotImplementedError(
+            f"{type(self).__name__} does not support image input"
+        )
+    @staticmethod
+    def split_system_message(messages: list[dict]) -> tuple[str, list[dict]]:
+        """Extract system message from a message list. Returns (system_text, remaining_messages)."""
+        system_msg = ""
+        user_messages = []
+        for m in messages:
+            if m["role"] == "system":
+                system_msg = m["content"]
+            else:
+                user_messages.append(m)
+        return system_msg, user_messages
+```
+- [ ] **Step 4: Run tests**
+Run: `python -m pytest tests/test_types.py -v`
+Expected: All PASS
+- [ ] **Step 5: Commit**
+```bash
+git add core/types.py tests/test_types.py
+git commit -m "feat: add type system — enums, dataclasses, LLMBackend ABC"
+```
+---
+### Task 5: Create Serializers
+**Files:**
+- Create: `core/serializers.py`
+- Test: `tests/test_serializers.py`
+- [ ] **Step 1: Write tests**
+```python
+"""Tests for core/serializers.py."""
+import pytest
+from core.serializers import ExecutionResultSerializer, ValidationResultSerializer
+class TestExecutionResultSerializer:
+    def test_success(self):
+        """Test with a mock-like ExecutionResult."""
+        class FakeResult:
+            success = True
+            volume = 6000.0
+            bounding_box = (10.0, 20.0, 30.0)
+            face_count = 6
+            edge_count = 12
+            error = None
+        d = ExecutionResultSerializer.to_dict(FakeResult())
+        assert d["success"] is True
+        assert d["volume_mm3"] == 6000.0
+        assert d["bounding_box_mm"] == [10.0, 20.0, 30.0]
+        assert d["face_count"] == 6
+        assert d["error"] is None
+    def test_failure(self):
+        class FakeResult:
+            success = False
+            volume = 0.0
+            bounding_box = ()
+            face_count = 0
+            edge_count = 0
+            error = "syntax error"
+        d = ExecutionResultSerializer.to_dict(FakeResult())
+        assert d["success"] is False
+        assert d["error"] == "syntax error"
+        assert d["bounding_box_mm"] == []
+class TestValidationResultSerializer:
+    def test_basic(self):
+        class FakeIssue:
+            severity = "warning"
+            category = "Size"
+            message = "Part is large"
+        class FakeResult:
+            machinable = True
+            axis_recommendation = "3-axis"
+            error_count = 0
+            warning_count = 1
+            issues = [FakeIssue()]
+        d = ValidationResultSerializer.to_dict(FakeResult())
+        assert d["machinable"] is True
+        assert d["axis_recommendation"] == "3-axis"
+        assert len(d["issues"]) == 1
+        assert d["issues"][0]["severity"] == "warning"
+    def test_empty_issues(self):
+        class FakeResult:
+            machinable = True
+            axis_recommendation = "3-axis"
+            error_count = 0
+            warning_count = 0
+            issues = []
+        d = ValidationResultSerializer.to_dict(FakeResult())
+        assert d["issues"] == []
+```
+- [ ] **Step 2: Run tests to verify failure**
+- [ ] **Step 3: Create core/serializers.py**
+```python
+"""Serializers for execution and validation results.
+Eliminates duplicated dict-building code across mcp.py and orchestrator.py.
+"""
+from __future__ import annotations
+class ExecutionResultSerializer:
+    """Serialize ExecutionResult to JSON-ready dict."""
+    @staticmethod
+    def to_dict(result) -> dict:
+        return {
+            "success": result.success,
+            "volume_mm3": result.volume,
+            "bounding_box_mm": list(result.bounding_box) if result.bounding_box else [],
+            "face_count": result.face_count,
+            "edge_count": result.edge_count,
+            "error": result.error,
+        }
+class ValidationResultSerializer:
+    """Serialize CNCValidationResult to JSON-ready dict."""
+    @staticmethod
+    def to_dict(result) -> dict:
+        return {
+            "machinable": result.machinable,
+            "axis_recommendation": result.axis_recommendation,
+            "error_count": result.error_count,
+            "warning_count": result.warning_count,
+            "issues": [
+                {"severity": i.severity, "category": i.category, "message": i.message}
+                for i in result.issues
+            ],
+        }
+```
+- [ ] **Step 4: Run tests**
+Run: `python -m pytest tests/test_serializers.py -v`
+Expected: All PASS
+- [ ] **Step 5: Commit**
+```bash
+git add core/serializers.py tests/test_serializers.py
+git commit -m "feat: add serializers for execution and validation results"
+```
+---
+### Task 6: Create Backend Factory
+**Files:**
+- Create: `core/backend_factory.py`
+- Test: `tests/test_backend_factory.py`
+- [ ] **Step 1: Write tests**
+```python
+"""Tests for core/backend_factory.py."""
+import pytest
+from core.backend_factory import BackendFactory
+from core.types import LLMBackend
+class TestBackendFactory:
+    def test_create_mock(self):
+        backend = BackendFactory.create("mock")
+        assert isinstance(backend, LLMBackend)
+    def test_create_unknown_raises(self):
+        with pytest.raises(ValueError, match="Unknown backend"):
+            BackendFactory.create("nonexistent")
+    def test_registry_has_mock(self):
+        assert "mock" in BackendFactory._registry
+    def test_mock_can_generate(self):
+        backend = BackendFactory.create("mock")
+        result = backend.generate([{"role": "user", "content": "a 50mm cube"}])
+        assert isinstance(result, str)
+        assert "result" in result
+```
+- [ ] **Step 2: Run tests to verify failure**
+- [ ] **Step 3: Create core/backend_factory.py**
+```python
+"""Backend factory — centralized creation of LLM backends.
+Replaces scattered if/elif backend selection across mcp.py, routes.py, web.py.
+"""
+from __future__ import annotations
+import logging
+from typing import TYPE_CHECKING
+from core.types import LLMBackend
+if TYPE_CHECKING:
+    from config.settings import Settings
+logger = logging.getLogger(__name__)
+class BackendFactory:
+    """Registry and factory for LLM backends."""
+    _registry: dict[str, type[LLMBackend]] = {}
+    @classmethod
+    def register(cls, name: str, backend_cls: type[LLMBackend]) -> None:
+        cls._registry[name] = backend_cls
+    @classmethod
+    def create(cls, name: str, **kwargs) -> LLMBackend:
+        if name not in cls._registry:
+            raise ValueError(f"Unknown backend: {name!r}. Available: {list(cls._registry.keys())}")
+        return cls._registry[name](**kwargs)
+    @classmethod
+    def create_safe(cls, name: str, **kwargs) -> LLMBackend:
+        """Create backend, falling back to mock on failure."""
+        try:
+            return cls.create(name, **kwargs)
+        except Exception as exc:
+            logger.warning("Backend %r unavailable (%s), falling back to mock", name, exc)
+            return cls.create("mock")
+def _register_defaults() -> None:
+    """Register all built-in backends. Called at module load."""
+    from core.backends import MockBackend
+    BackendFactory.register("mock", MockBackend)
+    # Lazy-register API backends — they validate keys on __init__
+    try:
+        from core.backends import AnthropicBackend
+        BackendFactory.register("anthropic", AnthropicBackend)
+    except Exception:
+        pass
+    try:
+        from core.backends import OpenAIBackend
+        BackendFactory.register("openai", OpenAIBackend)
+    except Exception:
+        pass
+    try:
+        from core.backends import GeminiBackend
+        BackendFactory.register("gemini", GeminiBackend)
+    except Exception:
+        pass
+_register_defaults()
+```
+- [ ] **Step 4: Run tests**
+Run: `python -m pytest tests/test_backend_factory.py -v`
+Expected: All PASS
+- [ ] **Step 5: Commit**
+```bash
+git add core/backend_factory.py tests/test_backend_factory.py
+git commit -m "feat: add BackendFactory registry for centralized backend creation"
+```
+---
+### Task 7: Config-Drive Backends + ABC Inheritance
+**Files:**
+- Modify: `core/backends.py`
+- [ ] **Step 1: Update LLMBackend base and all backends in core/backends.py**
+Changes to make:
+1. Replace `class LLMBackend:` with import from `core.types`: `from core.types import LLMBackend`
+2. Make `AnthropicBackend`, `OpenAIBackend`, `GeminiBackend` read defaults from `settings`:
+   - Model names: `model or settings.model_for.get("anthropic", "claude-sonnet-4-20250514")`
+   - API keys: `api_key or settings.anthropic_api_key or os.environ.get("ANTHROPIC_API_KEY")`
+   - Replace all `max_tokens=4096` with `settings.max_tokens`
+   - Replace all `temperature=0.2` with `settings.temperature`
+3. Use `self.split_system_message(messages)` in Anthropic/Gemini to eliminate duplicated message parsing
+4. `MockBackend` — replace hardcoded `_THREAD_CLEARANCE` dict with `settings.fasteners`
+- [ ] **Step 2: Run existing tests**
+Run: `python -m pytest tests/ -v --tb=short`
+Expected: All 107+ tests still PASS
+- [ ] **Step 3: Commit**
+```bash
+git add core/backends.py
+git commit -m "refactor: config-drive all backends, use LLMBackend ABC from types"
+```
+---
+### Task 8: Config-Drive Validator
+**Files:**
+- Modify: `core/validator.py`
+- [ ] **Step 1: Replace DEFAULT_CONFIG with settings**
+Replace the hardcoded `DEFAULT_CONFIG` dict with values loaded from settings:
+```python
+from config.settings import settings
+def _get_config(overrides: dict | None = None) -> dict:
+    """Build validation config from settings + optional overrides."""
+    base = dict(settings.validation)
+    if overrides:
+        base.update(overrides)
+    return base
+```
+Update `validate_for_cnc()` to use `_get_config(config)` instead of `DEFAULT_CONFIG | (config or {})`.
+Also replace magic numbers `100` and `50` for face count thresholds with:
+```python
+complexity = cfg.get("complexity_thresholds", {})
+five_axis_faces = complexity.get("five_axis_faces", 100)
+three_plus_two_faces = complexity.get("three_plus_two_faces", 50)
+```
+- [ ] **Step 2: Run existing validator tests**
+Run: `python -m pytest tests/test_validator.py -v`
+Expected: All PASS
+- [ ] **Step 3: Commit**
+```bash
+git add core/validator.py
+git commit -m "refactor: config-drive CNC validation thresholds"
+```
+---
+### Task 9: Wire Serializers into MCP and Orchestrator
+**Files:**
+- Modify: `server/mcp.py`
+- Modify: `agents/orchestrator.py`
+- [ ] **Step 1: Replace duplicated serialization in server/mcp.py**
+Add import:
+```python
+from core.serializers import ExecutionResultSerializer, ValidationResultSerializer
+```
+Replace all inline `{"volume_mm3": ..., "bounding_box_mm": ..., ...}` blocks (4 occurrences) with:
+```python
+ExecutionResultSerializer.to_dict(result.execution)
+```
+Replace all inline `{"machinable": ..., "axis_recommendation": ..., ...}` blocks (3 occurrences) with:
+```python
+ValidationResultSerializer.to_dict(result.validation)
+```
+- [ ] **Step 2: Replace duplicated serialization in agents/orchestrator.py**
+In `_execute_cad_code()`, replace the inline execution/validation dicts (lines ~110-132) with serializer calls.
+- [ ] **Step 3: Run full test suite**
+Run: `python -m pytest tests/ -v --tb=short`
+Expected: All tests PASS
+- [ ] **Step 4: Commit**
+```bash
+git add server/mcp.py agents/orchestrator.py
+git commit -m "refactor: use serializers to eliminate duplicated result building"
+```
+---
+### Task 10: Final Verification
+- [ ] **Step 1: Run full test suite**
+Run: `python -m pytest tests/ -v --tb=short`
+Expected: All 107+ tests PASS plus new tests (~20 new)
+- [ ] **Step 2: Verify config loading works**
+Run: `python -c "from config.settings import settings; print(settings.agents.keys()); print(settings.model_for)"`
+Expected: Prints agent IDs and model names from config.yaml.
+- [ ] **Step 3: Verify backend factory works**
+Run: `python -c "from core.backend_factory import BackendFactory; b = BackendFactory.create('mock'); print(b.generate([{'role':'user','content':'a cube'}])[:50])"`
+Expected: Prints first 50 chars of generated CadQuery code.

docs/superpowers/specs/2026-04-11-oop-config-crewai-design.md ADDED Viewed

	@@ -0,0 +1,1232 @@

+# NeuralCAD: OOP Refactor, Config Externalization, and CrewAI Overhaul
+## Context
+NeuralCAD has a working multi-agent CAD design system with 107 tests passing. However, the codebase has systemic issues:
+- **Hardcoded values everywhere**: model names, ports, thresholds, agent definitions, routing keywords, prompt templates all live in Python source code
+- **No OOP discipline**: no ABCs, no interfaces, no factory patterns, anemic domain models, god classes
+- **CrewAI at 8% utilization**: used as a sequential for-loop wrapper; memory, tools, delegation, hierarchical process, structured output, knowledge bases all unused
+- **Duplicated logic**: OUTPUT_DIR defined 3x, serialization code 4x, backend name checks scattered across 4+ files, message parsing duplicated per backend
+This refactor cleans up the architecture without changing user-facing behavior. All 107 existing tests must continue to pass.
+## Goals
+1. Every magic number, model name, threshold, and agent definition moves to `config.yaml` or `.env`
+2. Proper ABCs, factory patterns, and type safety throughout
+3. CrewAI used as intended: hierarchical process, tools, memory, structured output
+4. Eliminate all code duplication
+5. Split god classes/functions into focused units
+---
+## 1. Configuration System
+### 1.1 `.env` — Secrets and Environment Overrides
+```
+# API keys (secrets — never commit)
+ANTHROPIC_API_KEY=sk-ant-...
+OPENAI_API_KEY=sk-...
+GOOGLE_API_KEY=...
+# Environment overrides (optional — config.yaml has defaults)
+NEURALCAD_OUTPUT_DIR=./output
+NEURALCAD_WEB_PORT=5000
+NEURALCAD_MCP_PORT=8000
+```
+### 1.2 `config.yaml` — All Application Configuration
+```yaml
+server:
+  web_port: 5000
+  mcp_port: 8000
+  mcp_name: text-to-cnc
+  cors_origins: ["*"]
+  mcp_startup_wait_seconds: 2
+paths:
+  output_dir: ./output
+  web_dir: ./web
+  prompts_dir: ./agents/prompts
+backends:
+  default: mock
+  models:
+    anthropic: claude-sonnet-4-20250514
+    openai: gpt-4o
+    gemini: gemini-2.5-flash
+  max_tokens: 4096
+  temperature: 0.2
+orchestration:
+  max_history: 30
+  max_active_agents: 3
+  max_retries: 2
+  max_decisions: 20
+  max_recent_decisions: 5
+  part_name_max_chars: 40
+validation:
+  min_wall_thickness_mm: 1.5
+  min_fillet_radius_mm: 1.0
+  max_pocket_depth_ratio: 4.0
+  max_part_size_mm: 500.0
+  min_part_size_mm: 1.0
+  min_hole_diameter_mm: 1.0
+  complexity_thresholds:
+    five_axis_faces: 100
+    three_plus_two_faces: 50
+export:
+  stl_tolerance: 0.01
+agents:
+  design:
+    name: Design Agent
+    role: Industrial Designer
+    color: "#7c3aed"
+    avatar: DA
+    goal: >
+      Understand the user's intent and propose optimal form factors,
+      shapes, and aesthetic choices for mechanical parts.
+    backstory: >
+      You are an experienced industrial designer specializing in mechanical
+      parts. You think about form, function, ergonomics, and visual appeal.
+      You ask clarifying questions about the part's purpose, environment,
+      and constraints before proposing designs.
+  engineering:
+    name: Engineering Agent
+    role: Mechanical Engineer
+    color: "#00b4d8"
+    avatar: EA
+    goal: >
+      Ensure parts are structurally sound with correct dimensions,
+      tolerances, materials, and fastener specifications.
+    backstory: >
+      You are a senior mechanical engineer with deep knowledge of materials
+      science, stress analysis, and fastener standards. You specify wall
+      thicknesses, fillet radii, clearance holes, and material recommendations.
+  cnc:
+    name: CNC Agent
+    role: CNC Manufacturing Advisor
+    color: "#00e676"
+    avatar: CA
+    goal: >
+      Advise on manufacturability: tool access, wall thickness limits,
+      pocket ratios, axis requirements, and cost implications.
+    backstory: >
+      You are a CNC machinist with 20 years of shop floor experience.
+      You know what tool geometries can reach, what aspect ratios cause
+      chatter, and when to recommend 3-axis vs 3+2 vs 5-axis.
+  cad:
+    name: CAD Coder
+    role: CadQuery Code Generator
+    color: "#ffab40"
+    avatar: CC
+    goal: >
+      Generate valid CadQuery Python code that produces the agreed-upon 3D model.
+    backstory: >
+      You are an expert CadQuery programmer. You take the design specifications
+      agreed upon by the team and translate them into precise CadQuery Python
+      code. Your code always assigns the result to a variable called `result`
+      as a cq.Workplane object.
+routing:
+  cad_trigger_keywords:
+    - generate
+    - build
+    - build it
+    - preview
+    - show me
+    - create
+    - create the model
+    - model it
+    - render
+    - code
+    - make it
+    - produce
+  keywords:
+    design:
+      - design
+      - look
+      - shape
+      - style
+      - form
+      - aesthetic
+      - appearance
+      - layout
+      - concept
+      - idea
+      - propose
+      - suggest
+      - bracket
+      - mount
+      - enclosure
+      - housing
+      - ergonomic
+      - profile
+      - contour
+    engineering:
+      - dimension
+      - tolerance
+      - material
+      - strength
+      - load
+      - stress
+      - thickness
+      - wall
+      - fillet
+      - radius
+      - clearance
+      - m2
+      - m3
+      - m4
+      - m5
+      - m6
+      - m8
+      - m10
+      - m12
+      - aluminum
+      - steel
+      - brass
+      - titanium
+      - nylon
+      - gear
+      - bearing
+      - flange
+      - heatsink
+      - fin
+      - rib
+      - bolt
+      - screw
+      - thread
+      - torque
+      - deflection
+      - hole
+      - bore
+      - shaft
+      - keyway
+      - spline
+    cnc:
+      - machine
+      - mill
+      - cnc
+      - manufacture
+      - machinable
+      - axis
+      - tool
+      - fixture
+      - setup
+      - pocket
+      - undercut
+      - access
+      - 3-axis
+      - 5-axis
+      - cost
+      - surface finish
+      - roughness
+      - endmill
+      - drill
+      - tap
+      - chamfer tool
+      - deburr
+      - setup count
+      - cycle time
+      - tolerance class
+materials:
+  - aluminum
+  - aluminium
+  - steel
+  - stainless steel
+  - brass
+  - copper
+  - titanium
+  - nylon
+  - delrin
+  - acetal
+  - abs
+  - polycarbonate
+  - peek
+material_grades:
+  "6061": aluminum 6061
+  "7075": aluminum 7075
+  "304": stainless steel 304
+  "316": stainless steel 316
+  t6: aluminum 6061-T6
+dimension_contexts:
+  wide: width
+  width: width
+  tall: height
+  height: height
+  high: height
+  thick: thickness
+  thickness: thickness
+  deep: depth
+  depth: depth
+  long: length
+  length: length
+  diameter: diameter
+  dia: diameter
+  radius: radius
+  arm: arm_length
+fasteners:
+  M2: 2.4
+  M3: 3.4
+  M4: 4.5
+  M5: 5.5
+  M6: 6.6
+  M8: 9.0
+  M10: 11.0
+  M12: 13.5
+fallback_messages:
+  design: "I'd love to help shape this design. Could you describe the part's purpose and any size constraints?"
+  engineering: "I can help with the structural details. What material and load conditions are we working with?"
+  cnc: "I'll check manufacturability once we have more design details. Any machining preferences (3-axis, 5-axis)?"
+  cad: "I'm ready to generate the model once the design is agreed upon. Say 'preview' when you're ready."
+```
+### 1.3 `config/settings.py` — Pydantic Settings Class
+```python
+from pydantic_settings import BaseSettings
+from pydantic import Field
+from pathlib import Path
+import yaml
+class Settings(BaseSettings):
+    """Single source of truth for all NeuralCAD configuration."""
+    # .env secrets
+    anthropic_api_key: str = ""
+    openai_api_key: str = ""
+    google_api_key: str = ""
+    # Loaded from config.yaml (see model_post_init)
+    server: dict = Field(default_factory=dict)
+    paths: dict = Field(default_factory=dict)
+    backends: dict = Field(default_factory=dict)
+    orchestration: dict = Field(default_factory=dict)
+    validation: dict = Field(default_factory=dict)
+    agents: dict = Field(default_factory=dict)
+    routing: dict = Field(default_factory=dict)
+    materials: list = Field(default_factory=list)
+    material_grades: dict = Field(default_factory=dict)
+    dimension_contexts: dict = Field(default_factory=dict)
+    fasteners: dict = Field(default_factory=dict)
+    fallback_messages: dict = Field(default_factory=dict)
+    export: dict = Field(default_factory=dict)
+    model_config = {"env_prefix": "NEURALCAD_", "env_file": ".env"}
+    def model_post_init(self, __context):
+        config_path = Path(__file__).parent.parent / "config.yaml"
+        if config_path.exists():
+            with open(config_path) as f:
+                data = yaml.safe_load(f)
+            for key, value in data.items():
+                if hasattr(self, key) and not getattr(self, key):
+                    setattr(self, key, value)
+    # Convenience accessors
+    @property
+    def output_dir(self) -> Path:
+        return Path(self.paths.get("output_dir", "./output"))
+    @property
+    def web_port(self) -> int:
+        return self.server.get("web_port", 5000)
+    @property
+    def mcp_port(self) -> int:
+        return self.server.get("mcp_port", 8000)
+    @property
+    def default_backend(self) -> str:
+        return self.backends.get("default", "mock")
+    @property
+    def model_for(self) -> dict[str, str]:
+        return self.backends.get("models", {})
+    @property
+    def max_tokens(self) -> int:
+        return self.backends.get("max_tokens", 4096)
+    @property
+    def temperature(self) -> float:
+        return self.backends.get("temperature", 0.2)
+# Singleton — import this everywhere
+settings = Settings()
+```
+Every module imports `from config.settings import settings` instead of defining its own constants.
+---
+## 2. Type System and ABCs
+### 2.1 `core/types.py` — Enums, Dataclasses, ABCs
+```python
+from abc import ABC, abstractmethod
+from enum import Enum
+from dataclasses import dataclass, field
+from typing import Optional
+class BackendName(str, Enum):
+    MOCK = "mock"
+    ANTHROPIC = "anthropic"
+    OPENAI = "openai"
+    GEMINI = "gemini"
+class AgentId(str, Enum):
+    DESIGN = "design"
+    ENGINEERING = "engineering"
+    CNC = "cnc"
+    CAD = "cad"
+@dataclass
+class AgentResponse:
+    agent_id: str
+    agent_name: str
+    message: str
+    color: str
+    avatar: str
+    code: Optional[str] = None
+@dataclass
+class ChatResult:
+    responses: list[AgentResponse]
+    preview: Optional[dict] = None
+    design_state: dict = field(default_factory=dict)
+class LLMBackend(ABC):
+    @abstractmethod
+    def generate(self, messages: list[dict]) -> str: ...
+    def generate_with_image(self, messages: list[dict], image_path) -> str:
+        raise NotImplementedError(f"{type(self).__name__} does not support vision")
+```
+### 2.2 `agents/base.py` — Orchestrator ABC
+```python
+class BaseOrchestrator(ABC):
+    def __init__(self, settings: Settings, output_dir: Path):
+        self.settings = settings
+        self.output_dir = output_dir
+        self.output_dir.mkdir(parents=True, exist_ok=True)
+    @abstractmethod
+    def chat_turn(
+        self,
+        message: str,
+        history: list[dict],
+        mentions: list[str] | None = None,
+        design_state: dict | None = None,
+    ) -> ChatResult: ...
+```
+---
+## 3. Backend Refactoring
+### 3.1 Config-Driven Backends
+Each backend reads from `settings` instead of hardcoding:
+```python
+class AnthropicBackend(LLMBackend):
+    def __init__(self, model: str | None = None, api_key: str | None = None):
+        self.model = model or settings.model_for.get("anthropic", "claude-sonnet-4-20250514")
+        self.api_key = api_key or settings.anthropic_api_key
+        if not self.api_key:
+            raise ValueError("AnthropicBackend requires ANTHROPIC_API_KEY")
+        self.client = anthropic.Anthropic(api_key=self.api_key)
+    def generate(self, messages: list[dict]) -> str:
+        system, msgs = self._split_system(messages)
+        response = self.client.messages.create(
+            model=self.model,
+            max_tokens=settings.max_tokens,
+            temperature=settings.temperature,
+            system=system,
+            messages=msgs,
+        )
+        return response.content[0].text
+```
+Same pattern for `OpenAIBackend` and `GeminiBackend`. The `_split_system()` helper is extracted to a shared method on `LLMBackend` base class to eliminate the 4x duplication.
+### 3.2 Backend Factory
+```python
+# core/backend_factory.py
+class BackendFactory:
+    _registry: dict[str, type[LLMBackend]] = {}
+    @classmethod
+    def register(cls, name: str, backend_cls: type[LLMBackend]):
+        cls._registry[name] = backend_cls
+    @classmethod
+    def create(cls, name: str) -> LLMBackend:
+        if name not in cls._registry:
+            raise ValueError(f"Unknown backend: {name}")
+        return cls._registry[name]()
+# Registration at module load
+BackendFactory.register("mock", MockBackend)
+BackendFactory.register("anthropic", AnthropicBackend)
+BackendFactory.register("openai", OpenAIBackend)
+BackendFactory.register("gemini", GeminiBackend)
+```
+This replaces every `if backend == "anthropic": ...` scattered across `mcp.py`, `routes.py`, `web.py`, `orchestrator.py`.
+### 3.3 MockBackend Strategy Pattern
+Split the 204-line `_generate_code()`:
+```python
+# core/mock/generators.py
+class ShapeGenerator(ABC):
+    @abstractmethod
+    def generate(self, params: ParsedPrompt) -> list[str]: ...
+class CylinderGenerator(ShapeGenerator): ...
+class PlateGenerator(ShapeGenerator): ...
+class BoxGenerator(ShapeGenerator): ...
+class LBracketGenerator(ShapeGenerator): ...
+# core/mock/backend.py
+class MockBackend(LLMBackend):
+    _generators = {
+        "cylinder": CylinderGenerator(),
+        "plate": PlateGenerator(),
+        "l_bracket": LBracketGenerator(),
+        "box": BoxGenerator(),
+    }
+    def _generate_code(self, params: ParsedPrompt) -> str:
+        generator = self._generators.get(params.shape, self._generators["box"])
+        lines = generator.generate(params)
+        return "\n".join(lines)
+```
+`ParsedPrompt` is a dataclass produced by `PromptParser` (extracted from `_parse_prompt()`). Thread clearance table loaded from `settings.fasteners`.
+---
+## 4. Serializers
+### 4.1 `core/serializers.py`
+Eliminates the 4x execution result and 3x validation result duplication in `mcp.py` and `orchestrator.py`:
+```python
+class ExecutionResultSerializer:
+    @staticmethod
+    def to_dict(result: ExecutionResult) -> dict:
+        return {
+            "success": result.success,
+            "volume_mm3": result.volume,
+            "bounding_box_mm": list(result.bounding_box) if result.bounding_box else [],
+            "face_count": result.face_count,
+            "edge_count": result.edge_count,
+            "error": result.error,
+        }
+class ValidationResultSerializer:
+    @staticmethod
+    def to_dict(result: CNCValidationResult) -> dict:
+        return {
+            "machinable": result.machinable,
+            "axis_recommendation": result.axis_recommendation,
+            "error_count": result.error_count,
+            "warning_count": result.warning_count,
+            "issues": [
+                {"severity": i.severity, "category": i.category, "message": i.message}
+                for i in result.issues
+            ],
+        }
+```
+---
+## 5. CrewAI Overhaul
+### 5.1 Hierarchical Process with Manager
+Replace `Process.sequential` with `Process.hierarchical`. The Design Agent acts as manager — it decides which specialists to involve based on the conversation:
+```python
+# agents/crew_orchestrator.py
+crew = Crew(
+    agents=crew_agents,
+    tasks=crew_tasks,
+    process=Process.hierarchical,
+    manager_llm=self._build_llm(),
+    memory=True,
+    verbose=settings.server.get("debug", False),
+)
+```
+### 5.2 Structured Output
+Replace regex parsing with `output_pydantic`:
+```python
+from pydantic import BaseModel
+class AgentOutput(BaseModel):
+    message: str
+    code: str | None = None
+    reasoning: str = ""
+# In task creation:
+task = Task(
+    description=context,
+    expected_output="Structured response with message and optional code",
+    output_pydantic=AgentOutput,
+    agent=agent,
+)
+```
+This eliminates the fragile regex-based code extraction in `_extract_code()`.
+### 5.3 CadQuery Tools for CAD Agent
+```python
+# agents/tools.py
+from crewai.tools import tool
+from core.executor import execute_cadquery
+@tool("Execute CadQuery Code")
+def execute_cad_tool(code: str) -> str:
+    """Execute CadQuery Python code and return geometry info.
+    The code must assign result to a variable called `result` as cq.Workplane.
+    Returns: success status, volume, bounding box, face/edge counts, or error message.
+    """
+    result = execute_cadquery(code)
+    return ExecutionResultSerializer.to_dict(result)
+@tool("Validate CNC Manufacturability")
+def validate_cad_tool(code: str) -> str:
+    """Run CNC manufacturability checks on CadQuery code.
+    Returns: machinable status, axis recommendation, issues list.
+    """
+    exec_result = execute_cadquery(code)
+    if not exec_result.success:
+        return {"error": exec_result.error}
+    validation = validate_for_cnc(exec_result.result)
+    return ValidationResultSerializer.to_dict(validation)
+```
+The CAD agent gets these tools, enabling it to execute code within its reasoning loop and self-correct without the external retry mechanism:
+```python
+cad_agent = Agent(
+    role=agent_cfg["role"],
+    goal=agent_cfg["goal"],
+    backstory=agent_cfg["backstory"],
+    tools=[execute_cad_tool, validate_cad_tool],
+    llm=llm,
+    allow_delegation=False,
+    verbose=debug,
+)
+```
+### 5.4 Complete LLM Adapter
+```python
+class NeuralCADLLMAdapter(BaseLLM):
+    def __init__(self, backend: LLMBackend, model: str = "custom"):
+        super().__init__(model=model)
+        self.backend = backend
+    def call(self, messages, tools=None, callbacks=None, **kwargs) -> str:
+        if isinstance(messages, str):
+            messages = [{"role": "user", "content": messages}]
+        return self.backend.generate(messages)
+    def supports_function_calling(self) -> bool:
+        return True  # Enable tool use
+    def supports_stop_words(self) -> bool:
+        return False
+    def supports_vision(self) -> bool:
+        return hasattr(self.backend, 'generate_with_image')
+```
+Key change: `supports_function_calling` returns `True` so CrewAI can route tool calls through the adapter.
+### 5.5 Memory Integration
+CrewAI's built-in memory replaces manual `DesignState` tracking for the crew orchestrator:
+```python
+crew = Crew(
+    agents=crew_agents,
+    tasks=crew_tasks,
+    process=Process.hierarchical,
+    manager_llm=llm,
+    memory=True,  # Enables short-term + entity memory
+)
+```
+`DesignState` remains for the single-call and mock orchestrators (which don't use CrewAI), but the crew orchestrator delegates state to CrewAI's memory system. The `extract_decisions()` function is still called post-hoc to populate the response's `design_state` field for frontend compatibility.
+### 5.6 Delegation
+Enable delegation for engineering and CNC agents so they can hand off questions to each other:
+```python
+engineering_agent = Agent(
+    ...
+    allow_delegation=True,  # Can ask CNC about manufacturability
+)
+cnc_agent = Agent(
+    ...
+    allow_delegation=True,  # Can ask engineering about specs
+)
+cad_agent = Agent(
+    ...
+    allow_delegation=False,  # Only generates code, no delegation
+)
+```
+### 5.7 Knowledge Base (RAG over CadQuery Docs)
+CrewAI ships chromadb but NeuralCAD never uses it. Add a knowledge base so agents can look up CadQuery API methods, material properties, and machining guidelines instead of relying on LLM training data alone:
+```python
+# agents/knowledge.py
+from crewai.knowledge.source import TextFileKnowledgeSource
+def build_knowledge_sources(settings: Settings) -> list:
+    """Load knowledge sources from docs/ directory."""
+    sources = []
+    knowledge_dir = Path(settings.paths.get("knowledge_dir", "./docs/knowledge"))
+    if knowledge_dir.exists():
+        for md_file in knowledge_dir.glob("*.md"):
+            sources.append(TextFileKnowledgeSource(file_paths=[str(md_file)]))
+    return sources
+```
+Knowledge files to create:
+| File | Content | Benefits |
+|------|---------|----------|
+| `docs/knowledge/cadquery_api.md` | CadQuery method signatures, parameter docs, common patterns | CAD agent generates correct code on first try |
+| `docs/knowledge/cnc_guidelines.md` | Wall thickness limits, pocket ratios, tool access rules, axis selection criteria | CNC agent gives precise manufacturability advice |
+| `docs/knowledge/materials.md` | Material properties, grades, machinability ratings, cost tiers | Engineering agent makes informed material recommendations |
+| `docs/knowledge/fasteners.md` | ISO clearance/tapping hole sizes, torque specs, strength grades | Engineering agent specifies correct hole sizes |
+Usage in crew creation:
+```python
+from agents.knowledge import build_knowledge_sources
+crew = Crew(
+    agents=crew_agents,
+    tasks=crew_tasks,
+    process=Process.hierarchical,
+    manager_llm=llm,
+    memory=True,
+    knowledge_sources=build_knowledge_sources(settings),
+)
+```
+Config addition to `config.yaml`:
+```yaml
+paths:
+  knowledge_dir: ./docs/knowledge
+```
+### 5.8 Guardrails (Output Validation with Auto-Retry)
+Replace the manual retry loop in `_execute_cad_code()` with CrewAI's built-in guardrail system. When the CAD agent produces invalid code, CrewAI automatically retries with the error feedback:
+```python
+# agents/guardrails.py
+from core.executor import execute_cadquery
+def validate_cad_output(output) -> tuple[bool, str]:
+    """Guardrail: validate CAD agent output contains executable code."""
+    if not hasattr(output, 'code') or not output.code:
+        return (True, "")  # No code in output is OK (conversational response)
+    code = output.code
+    if "result" not in code:
+        return (False, "Code must assign the final solid to a variable called `result` as a cq.Workplane.")
+    # Quick syntax check (don't execute — the tool handles that)
+    try:
+        compile(code, "<cad_output>", "exec")
+    except SyntaxError as e:
+        return (False, f"Syntax error in generated code: {e}")
+    return (True, "")
+```
+Usage in task creation:
+```python
+cad_task = Task(
+    description="Generate CadQuery code based on the agreed design...",
+    expected_output="Valid CadQuery Python code",
+    output_pydantic=AgentOutput,
+    agent=cad_agent,
+    guardrail=validate_cad_output,  # Auto-retries on failure
+    context=[design_task, engineering_task],
+)
+```
+This replaces the manual `while not exec_result.success and retries < max_retries` loop in `orchestrator.py:76-89`. CrewAI handles the retry internally with the error message fed back to the agent.
+### 5.9 Task Context (Agent-to-Agent Data Flow)
+Instead of building an identical context blob for all agents, use CrewAI's task context system so each agent sees the output of previous agents:
+```python
+# Design goes first — gets only user message + history
+design_task = Task(
+    description=f"User says: {message}\n\nPropose a design approach.",
+    expected_output="Design proposal with shape and form factor",
+    output_pydantic=AgentOutput,
+    agent=design_agent,
+)
+# Engineering sees design output
+engineering_task = Task(
+    description=f"User says: {message}\n\nSpecify engineering details.",
+    expected_output="Dimensions, materials, fastener specs",
+    output_pydantic=AgentOutput,
+    agent=engineering_agent,
+    context=[design_task],  # Receives design agent's output
+)
+# CNC sees both design and engineering output
+cnc_task = Task(
+    description=f"User says: {message}\n\nAssess manufacturability.",
+    expected_output="CNC feasibility, axis recommendation, issues",
+    output_pydantic=AgentOutput,
+    agent=cnc_agent,
+    context=[design_task, engineering_task],
+)
+# CAD sees all three — has full agreed spec
+cad_task = Task(
+    description=f"Generate CadQuery code for the agreed design.",
+    expected_output="Valid CadQuery Python code",
+    output_pydantic=AgentOutput,
+    agent=cad_agent,
+    guardrail=validate_cad_output,
+    context=[design_task, engineering_task, cnc_task],
+)
+```
+This replaces `_build_agent_context()` which currently dumps the same blob to everyone. Each agent now builds on the previous agent's actual output rather than reasoning in isolation.
+### 5.10 Callbacks for Observability
+Replace `verbose=False` (which hides all reasoning) with structured callbacks that can be logged and optionally streamed to the frontend:
+```python
+# agents/callbacks.py
+import logging
+logger = logging.getLogger(__name__)
+def on_task_start(task):
+    """Called when an agent starts working on a task."""
+    logger.info("Agent %s starting task: %s", task.agent.role, task.description[:80])
+def on_task_complete(task_output):
+    """Called when an agent finishes a task."""
+    logger.info(
+        "Agent %s completed — output length: %d chars",
+        task_output.agent,
+        len(str(task_output.raw)),
+    )
+def on_step(step_output):
+    """Called on each reasoning step (thought/action/observation)."""
+    logger.debug("Step: %s", str(step_output)[:200])
+```
+Usage:
+```python
+crew = Crew(
+    agents=crew_agents,
+    tasks=crew_tasks,
+    process=Process.hierarchical,
+    manager_llm=llm,
+    memory=True,
+    knowledge_sources=build_knowledge_sources(settings),
+    task_callback=on_task_complete,
+    step_callback=on_step,
+)
+```
+Future enhancement: these callbacks could push events to the frontend via SSE for real-time per-agent typing indicators (e.g., "Engineering Agent is thinking...").
+### 5.11 Planning Mode
+Enable the hierarchical manager to create an execution plan before dispatching tasks:
+```python
+crew = Crew(
+    agents=crew_agents,
+    tasks=crew_tasks,
+    process=Process.hierarchical,
+    manager_llm=llm,
+    planning=True,         # Manager creates plan first
+    planning_llm=llm,      # LLM used for planning step
+    memory=True,
+    knowledge_sources=build_knowledge_sources(settings),
+    task_callback=on_task_complete,
+    step_callback=on_step,
+)
+```
+With planning enabled, the manager agent first analyzes the user's message and creates a step-by-step plan: "1. Ask Design about form factor, 2. Ask Engineering about M6 hole placement, 3. Skip CNC (simple geometry), 4. Have CAD generate code." This is smarter than always running all agents — for simple requests it may only activate 1-2 agents.
+Config addition to `config.yaml`:
+```yaml
+crewai:
+  planning: true
+  verbose: false
+  memory: true
+  knowledge_dir: ./docs/knowledge
+```
+### 5.12 CrewAI Utilization Summary
+| Feature | Before (8%) | After (~80%) | Impact |
+|---------|-------------|--------------|--------|
+| Process | sequential | hierarchical + planning | Manager coordinates; skips irrelevant agents |
+| Output | Regex parsing | output_pydantic + guardrails | Type-safe; auto-retry on invalid output |
+| Tools | Disabled | execute_cad, validate_cad | CAD agent self-corrects within reasoning |
+| Memory | Manual DesignState only | CrewAI short-term + entity memory | Agents remember context across reasoning steps |
+| Knowledge | None | RAG over CadQuery docs, materials, CNC guidelines | Agents access reference data, not just training |
+| Context | Identical blob to all | Task context chains (design → engineering → CNC → CAD) | Each agent builds on previous outputs |
+| Delegation | Disabled | Engineering ↔ CNC | Agents consult each other when needed |
+| Observability | verbose=False | Structured callbacks (logging + future SSE) | Debug agent reasoning, surface to frontend |
+| Planning | None | Manager plans before executing | Smarter agent selection per turn |
+| Validation | Manual retry loop | Guardrails with auto-retry | Cleaner code, CrewAI handles retry logic |
+Features deliberately NOT used:
+| Feature | Reason |
+|---------|--------|
+| LongTermMemory (SQLite) | DesignState + localStorage already covers persistence |
+| Flows (multi-crew pipelines) | One crew per turn is sufficient |
+| Training (human feedback) | Requires feedback infrastructure; premature |
+| Telemetry | Sends data to CrewAI servers; privacy concern |
+---
+## 6. DesignState as Proper Domain Model
+Move `extract_decisions()` into `DesignState` as a method:
+```python
+class DesignState(BaseModel):
+    part_name: str = ""
+    description: str = ""
+    material: str = ""
+    dimensions: dict[str, float] = Field(default_factory=dict)
+    features: list[str] = Field(default_factory=list)
+    constraints: list[str] = Field(default_factory=list)
+    decisions: list[str] = Field(default_factory=list)
+    axis_recommendation: str = ""
+    def update_from_messages(
+        self, agent_responses: list[dict], user_message: str = ""
+    ) -> "DesignState":
+        """Extract decisions from agent responses and return updated state."""
+        # All the logic currently in extract_decisions() moves here
+        ...
+    def render(self) -> str:
+        """Render as concise spec block for LLM context."""
+        ...
+```
+Materials list, material grades, dimension contexts all loaded from `settings` instead of module-level constants.
+---
+## 7. Routing Engine
+```python
+# agents/routing.py
+class RoutingEngine:
+    def __init__(self, settings: Settings):
+        self.keywords = settings.routing.get("keywords", {})
+        self.cad_triggers = settings.routing.get("cad_trigger_keywords", [])
+        self.max_agents = settings.orchestration.get("max_active_agents", 3)
+    def route(self, message: str) -> list[str]:
+        """Return list of agent IDs that should respond."""
+        lower = message.lower()
+        scores = {agent_id: 0 for agent_id in self.keywords}
+        for agent_id, kws in self.keywords.items():
+            for kw in kws:
+                if kw in lower:
+                    scores[agent_id] += 1
+        active = [aid for aid, score in sorted(scores.items(), key=lambda x: -x[1]) if score > 0]
+        if not active:
+            active = [AgentId.DESIGN, AgentId.ENGINEERING]
+        return active[:self.max_agents]
+    def has_cad_trigger(self, message: str) -> bool:
+        lower = message.lower()
+        return any(kw in lower for kw in self.cad_triggers)
+    def parse_mentions(self, message: str) -> tuple[str, list[str]]:
+        """Extract @mentions from message."""
+        ...
+```
+This replaces the free functions `route_by_keywords()`, `parse_mentions()`, and the scattered `CAD_TRIGGER_KEYWORDS` checks.
+---
+## 8. Prompt System
+Move prompt templates from Python string literals to Jinja2 files:
+### `agents/prompts/orchestrator.j2`
+```jinja2
+You are the orchestrator for a multi-agent CAD design team.
+You control multiple specialist agents who collaborate with a user
+to design mechanical parts for CNC machining.
+## Your Agents
+{% for agent_id in active_agents %}
+### {{ agents[agent_id].name }} (id: "{{ agent_id }}")
+Role: {{ agents[agent_id].role }}
+Goal: {{ agents[agent_id].goal }}
+Personality: {{ agents[agent_id].backstory }}
+{% endfor %}
+## Instructions
+...
+```
+### `agents/prompt_builder.py`
+```python
+class PromptBuilder:
+    def __init__(self, settings: Settings):
+        self.env = jinja2.Environment(
+            loader=jinja2.FileSystemLoader(settings.paths.get("prompts_dir", "./agents/prompts"))
+        )
+        self.agents = settings.agents
+    def build_orchestrator_prompt(self, active_agents: list[str], include_cad: bool = False) -> str:
+        template = self.env.get_template("orchestrator.j2")
+        return template.render(active_agents=active_agents, agents=self.agents, include_cad=include_cad)
+```
+---
+## 9. Server Cleanup
+### 9.1 Single OUTPUT_DIR
+All three files (`web.py`, `mcp.py`, `routes.py`) import from settings:
+```python
+from config.settings import settings
+OUTPUT_DIR = settings.output_dir
+```
+### 9.2 MCPClient Class
+```python
+# server/mcp_client.py
+class MCPClient:
+    def __init__(self, url: str):
+        self.url = url
+    async def call_tool(self, name: str, arguments: dict) -> dict:
+        ...
+    async def read_resource(self, uri: str) -> str:
+        ...
+```
+### 9.3 Pydantic Models for All Endpoints
+Replace `body: dict` in `web.py` with proper Pydantic models:
+```python
+class GenerateRequest(BaseModel):
+    prompt: str
+    backend: str = "mock"
+    part_name: str = ""
+    max_retries: int = 2
+```
+### 9.4 Backend Selection via Factory
+Replace scattered `if backend_name in ("anthropic", "openai", "gemini"):` with:
+```python
+orchestrator = OrchestratorFactory.create(backend_name, settings)
+```
+---
+## 10. File Structure After Refactor
+```
+NeuralCAD/
+├── config.yaml                       # All non-secret config
+├── .env.example                      # Template for secrets
+├── config/
+│   └── settings.py                   # Pydantic Settings singleton
+├── core/
+│   ├── types.py                      # BackendName, AgentId enums; AgentResponse, ChatResult dataclasses; LLMBackend ABC
+│   ├── backends.py                   # Anthropic/OpenAI/Gemini backends (config-driven)
+│   ├── backend_factory.py            # BackendFactory registry
+│   ├── mock/
+│   │   ├── __init__.py
+│   │   ├── backend.py                # MockBackend (thin, delegates to parser + generators)
+│   │   ├── parser.py                 # PromptParser (extracted from _parse_prompt)
+│   │   └── generators.py            # ShapeGenerator ABC + Cylinder/Plate/Box/LBracket
+│   ├── executor.py                   # (cleaned, tolerance from config)
+│   ├── validator.py                  # (thresholds from config)
+│   ├── serializers.py                # ExecutionResultSerializer, ValidationResultSerializer
+│   ├── cadquery_prompts.py           # (unchanged — few-shot examples stay here)
+│   └── pipeline.py                   # (uses factory + config)
+├── agents/
+│   ├── base.py                       # BaseOrchestrator ABC
+│   ├── orchestrator.py               # SingleCallOrchestrator + MockChatBackend (inherit BaseOrchestrator)
+│   ├── crew_orchestrator.py          # CrewOrchestrator: hierarchical, tools, memory, structured output
+│   ├── llm_adapter.py                # Complete NeuralCADLLMAdapter
+│   ├── tools.py                      # @tool decorated execute_cad_tool, validate_cad_tool
+│   ├── design_state.py               # DesignState with update_from_messages() method
+│   ├── routing.py                    # RoutingEngine class
+│   ├── prompt_builder.py             # PromptBuilder (Jinja2)
+│   └── prompts/
+│       ├── orchestrator.j2           # Orchestrator system prompt template
+│       └── agent_persona.j2          # Per-agent persona template
+├── server/
+│   ├── web.py                        # (uses settings, MCPClient, Pydantic models)
+│   ├── routes.py                     # (uses factory, settings)
+│   ├── mcp.py                        # (uses serializers, factory)
+│   └── mcp_client.py                 # MCPClient class
+├── web/
+│   └── index.html                    # (unchanged)
+└── tests/                            # (updated to match new structure)
+```
+### New Files
+| File | Purpose |
+|------|---------|
+| `config.yaml` | All non-secret application configuration |
+| `.env.example` | Template showing required environment variables |
+| `config/settings.py` | Pydantic Settings singleton |
+| `core/types.py` | Enums, dataclasses, ABCs |
+| `core/backend_factory.py` | Backend registry + factory |
+| `core/mock/__init__.py` | Mock package init |
+| `core/mock/backend.py` | Thin MockBackend |
+| `core/mock/parser.py` | PromptParser |
+| `core/mock/generators.py` | Shape strategy classes |
+| `core/serializers.py` | Result serializers |
+| `agents/base.py` | BaseOrchestrator ABC |
+| `agents/tools.py` | CrewAI tool definitions |
+| `agents/routing.py` | RoutingEngine class |
+| `agents/prompt_builder.py` | Jinja2 prompt builder |
+| `agents/prompts/orchestrator.j2` | System prompt template |
+| `agents/prompts/agent_persona.j2` | Agent persona template |
+| `server/mcp_client.py` | MCPClient class |
+### Modified Files
+| File | Changes |
+|------|---------|
+| `core/backends.py` | Remove LLMBackend (moved to types.py), config-drive all backends, extract message helpers to base |
+| `core/executor.py` | STL tolerance from config |
+| `core/validator.py` | All thresholds from config |
+| `core/pipeline.py` | Use BackendFactory, config |
+| `agents/orchestrator.py` | Inherit BaseOrchestrator, use RoutingEngine, PromptBuilder, config |
+| `agents/crew_orchestrator.py` | Hierarchical process, tools, memory, structured output, config |
+| `agents/llm_adapter.py` | Complete interface, enable function calling |
+| `agents/design_state.py` | Move extract_decisions into method, load domain data from config |
+| `agents/definitions.py` | Remove (replaced by config.yaml agents section) |
+| `agents/prompts.py` | Remove (replaced by routing.py + prompt_builder.py) |
+| `server/web.py` | Use settings, MCPClient, Pydantic models |
+| `server/routes.py` | Use settings, factory |
+| `server/mcp.py` | Use serializers, factory, settings |
+| `pyproject.toml` | Add pydantic-settings, jinja2, pyyaml deps |
+### Deleted Files
+| File | Replaced By |
+|------|-------------|
+| `agents/definitions.py` | `config.yaml` agents section + `core/types.py` AgentId enum |
+| `agents/prompts.py` | `agents/routing.py` + `agents/prompt_builder.py` + `agents/prompts/*.j2` |
+---
+## 11. Testing Strategy
+All 107 existing tests must pass after refactor. Changes needed:
+- Tests that import from `agents.definitions` update to import from `config.settings`
+- Tests that import `parse_mentions`, `route_by_keywords` from `agents.prompts` update to use `RoutingEngine`
+- Tests that import `_format_response` update to use `AgentResponse` dataclass
+- `FakeLLMBackend` in conftest updated to inherit from `LLMBackend` ABC
+- New tests added for: `Settings`, `BackendFactory`, `RoutingEngine`, `PromptBuilder`, serializers, CrewAI tools
+### New Tests
+| File | Tests |
+|------|-------|
+| `tests/test_settings.py` | Config loading, env override, defaults, property accessors |
+| `tests/test_backend_factory.py` | Registration, creation, unknown backend error |
+| `tests/test_routing.py` | RoutingEngine.route(), has_cad_trigger(), parse_mentions() |
+| `tests/test_serializers.py` | ExecutionResultSerializer, ValidationResultSerializer |
+| `tests/test_tools.py` | execute_cad_tool, validate_cad_tool (requires CadQuery) |
+| `tests/test_mock_generators.py` | Each shape generator independently |
+---
+## 12. Migration Strategy
+The refactor is done in phases to keep tests green at each step:
+1. **Config foundation**: Add `config.yaml`, `config/settings.py`, `.env.example`. All existing code still works — settings just aren't used yet.
+2. **Types and ABCs**: Add `core/types.py`, `agents/base.py`. Existing classes don't inherit yet.
+3. **Backend refactor**: Config-drive backends, add factory, add ABC inheritance. Update imports.
+4. **Serializers**: Extract duplicated serialization. Update `mcp.py` and `orchestrator.py`.
+5. **Mock strategy**: Split MockBackend into parser + generators.
+6. **Routing + prompts**: Extract RoutingEngine and PromptBuilder. Delete `agents/prompts.py` and `agents/definitions.py`.
+7. **Orchestrator refactor**: Inherit BaseOrchestrator. Use RoutingEngine, PromptBuilder, config.
+8. **DesignState refactor**: Move extract_decisions into method.
+9. **CrewAI overhaul**: Hierarchical process, tools, memory, structured output, complete adapter.
+10. **Server cleanup**: MCPClient, Pydantic models, settings everywhere.
+11. **Test migration**: Update all test imports, add new tests.
+12. **Final verification**: Full suite green, manual smoke test.

pyproject.toml CHANGED Viewed

@@ -16,6 +16,8 @@ dependencies = [
     "fastapi>=0.110.0",
     "uvicorn>=0.29.0",
     "python-multipart>=0.0.9",
 ]
 [dependency-groups]

     "fastapi>=0.110.0",
     "uvicorn>=0.29.0",
     "python-multipart>=0.0.9",
+    "pydantic-settings>=2.0.0",
+    "pyyaml>=6.0",
 ]
 [dependency-groups]

server/mcp.py CHANGED Viewed

@@ -17,14 +17,14 @@ Usage:
 import json
 import os
-import sys
 from pathlib import Path
 from mcp.server.fastmcp import FastMCP
 from core.cadquery_prompts import build_messages, CADQUERY_SYSTEM_PROMPT
-from core.executor import ExecutionResult, execute_cadquery, export_all, sanitize_code
-from core.validator import validate_for_cnc, CNCValidationResult
 # ── Server Setup ──────────────────────────────────────────────────────────
@@ -117,27 +117,11 @@ def generate_cnc_model(
         "part_name": part_name,
         "retries": result.retry_count,
         "generated_code": result.generated_code,
-        "execution": {
-            "success": result.execution.success,
-            "volume_mm3": result.execution.volume,
-            "bounding_box_mm": list(result.execution.bounding_box) if result.execution.bounding_box else [],
-            "face_count": result.execution.face_count,
-            "edge_count": result.execution.edge_count,
-            "error": result.execution.error,
-        },
     }
     if result.validation:
-        response["validation"] = {
-            "machinable": result.validation.machinable,
-            "axis_recommendation": result.validation.axis_recommendation,
-            "error_count": result.validation.error_count,
-            "warning_count": result.validation.warning_count,
-            "issues": [
-                {"severity": i.severity, "category": i.category, "message": i.message}
-                for i in result.validation.issues
-            ],
-        }
     if result.exported_files:
         response["exported_files"] = {
@@ -189,14 +173,7 @@ def validate_cnc_model(
         }
         validation = validate_for_cnc(exec_result.result, part_name=part_name, config=config)
         response["validation"] = {
-            "machinable": validation.machinable,
-            "axis_recommendation": validation.axis_recommendation,
-            "error_count": validation.error_count,
-            "warning_count": validation.warning_count,
-            "issues": [
-                {"severity": i.severity, "category": i.category, "message": i.message}
-                for i in validation.issues
-            ],
             "summary": validation.summary(),
         }
@@ -231,13 +208,8 @@ def execute_cadquery_code(
     exec_result = execute_cadquery(code)
     response = {
-        "success": exec_result.success,
-        "error": exec_result.error,
         "stdout": exec_result.stdout,
-        "volume_mm3": exec_result.volume,
-        "bounding_box_mm": list(exec_result.bounding_box) if exec_result.bounding_box else [],
-        "face_count": exec_result.face_count,
-        "edge_count": exec_result.edge_count,
     }
     if exec_result.success and export_path:
@@ -358,28 +330,12 @@ def generate_from_image(
         "backend": backend,
         "retries": retry_count,
         "generated_code": generated_code,
-        "execution": {
-            "success": exec_result.success,
-            "volume_mm3": exec_result.volume,
-            "bounding_box_mm": list(exec_result.bounding_box) if exec_result.bounding_box else [],
-            "face_count": exec_result.face_count,
-            "edge_count": exec_result.edge_count,
-            "error": exec_result.error,
-        },
     }
     if exec_result.success:
         validation = validate_for_cnc(exec_result.result, part_name=part_name)
-        response["validation"] = {
-            "machinable": validation.machinable,
-            "axis_recommendation": validation.axis_recommendation,
-            "error_count": validation.error_count,
-            "warning_count": validation.warning_count,
-            "issues": [
-                {"severity": i.severity, "category": i.category, "message": i.message}
-                for i in validation.issues
-            ],
-        }
         base_path = DEFAULT_OUTPUT_DIR / part_name
         try:

 import json
 import os
 from pathlib import Path
 from mcp.server.fastmcp import FastMCP
 from core.cadquery_prompts import build_messages, CADQUERY_SYSTEM_PROMPT
+from core.executor import execute_cadquery, export_all
+from core.serializers import ExecutionResultSerializer, ValidationResultSerializer
+from core.validator import validate_for_cnc
 # ── Server Setup ──────────────────────────────────────────────────────────
         "part_name": part_name,
         "retries": result.retry_count,
         "generated_code": result.generated_code,
+        "execution": ExecutionResultSerializer.to_dict(result.execution),
     }
     if result.validation:
+        response["validation"] = ValidationResultSerializer.to_dict(result.validation)
     if result.exported_files:
         response["exported_files"] = {
         }
         validation = validate_for_cnc(exec_result.result, part_name=part_name, config=config)
         response["validation"] = {
+            **ValidationResultSerializer.to_dict(validation),
             "summary": validation.summary(),
         }
     exec_result = execute_cadquery(code)
     response = {
+        **ExecutionResultSerializer.to_dict(exec_result),
         "stdout": exec_result.stdout,
     }
     if exec_result.success and export_path:
         "backend": backend,
         "retries": retry_count,
         "generated_code": generated_code,
+        "execution": ExecutionResultSerializer.to_dict(exec_result),
     }
     if exec_result.success:
         validation = validate_for_cnc(exec_result.result, part_name=part_name)
+        response["validation"] = ValidationResultSerializer.to_dict(validation)
         base_path = DEFAULT_OUTPUT_DIR / part_name
         try:

tests/test_backend_factory.py ADDED Viewed

	@@ -0,0 +1,29 @@

+"""Tests for core/backend_factory.py."""
+import pytest
+from core.backend_factory import BackendFactory
+from core.types import LLMBackend
+class TestBackendFactory:
+    def test_create_mock(self):
+        backend = BackendFactory.create("mock")
+        # MockBackend inherits from old LLMBackend, not new ABC yet
+        # Just check it has generate method
+        assert hasattr(backend, "generate")
+    def test_create_unknown_raises(self):
+        with pytest.raises(ValueError, match="Unknown backend"):
+            BackendFactory.create("nonexistent")
+    def test_registry_has_mock(self):
+        assert "mock" in BackendFactory._registry
+    def test_mock_can_generate(self):
+        backend = BackendFactory.create("mock")
+        result = backend.generate([{"role": "user", "content": "a 50mm cube"}])
+        assert isinstance(result, str)
+        assert "result" in result
+    def test_create_safe_fallback(self):
+        backend = BackendFactory.create_safe("nonexistent_backend_xyz")
+        assert hasattr(backend, "generate")

tests/test_serializers.py ADDED Viewed

	@@ -0,0 +1,66 @@

+"""Tests for core/serializers.py."""
+from core.serializers import ExecutionResultSerializer, ValidationResultSerializer
+class TestExecutionResultSerializer:
+    def test_success(self):
+        class FakeResult:
+            success = True
+            volume = 6000.0
+            bounding_box = (10.0, 20.0, 30.0)
+            face_count = 6
+            edge_count = 12
+            error = None
+        d = ExecutionResultSerializer.to_dict(FakeResult())
+        assert d["success"] is True
+        assert d["volume_mm3"] == 6000.0
+        assert d["bounding_box_mm"] == [10.0, 20.0, 30.0]
+        assert d["face_count"] == 6
+        assert d["error"] is None
+    def test_failure(self):
+        class FakeResult:
+            success = False
+            volume = 0.0
+            bounding_box = ()
+            face_count = 0
+            edge_count = 0
+            error = "syntax error"
+        d = ExecutionResultSerializer.to_dict(FakeResult())
+        assert d["success"] is False
+        assert d["error"] == "syntax error"
+        assert d["bounding_box_mm"] == []
+class TestValidationResultSerializer:
+    def test_basic(self):
+        class FakeIssue:
+            severity = "warning"
+            category = "Size"
+            message = "Part is large"
+        class FakeResult:
+            machinable = True
+            axis_recommendation = "3-axis"
+            error_count = 0
+            warning_count = 1
+            issues = [FakeIssue()]
+        d = ValidationResultSerializer.to_dict(FakeResult())
+        assert d["machinable"] is True
+        assert d["axis_recommendation"] == "3-axis"
+        assert len(d["issues"]) == 1
+        assert d["issues"][0]["severity"] == "warning"
+    def test_empty_issues(self):
+        class FakeResult:
+            machinable = True
+            axis_recommendation = "3-axis"
+            error_count = 0
+            warning_count = 0
+            issues = []
+        d = ValidationResultSerializer.to_dict(FakeResult())
+        assert d["issues"] == []

tests/test_settings.py ADDED Viewed

	@@ -0,0 +1,50 @@

+"""Tests for config/settings.py."""
+from pathlib import Path
+class TestSettings:
+    def test_loads_config_yaml(self):
+        from config.settings import settings
+        assert settings.agents
+        assert "design" in settings.agents
+    def test_output_dir_property(self):
+        from config.settings import settings
+        assert isinstance(settings.output_dir, Path)
+    def test_web_port_property(self):
+        from config.settings import settings
+        assert isinstance(settings.web_port, int)
+        assert settings.web_port > 0
+    def test_model_for_property(self):
+        from config.settings import settings
+        models = settings.model_for
+        assert "anthropic" in models
+        assert "openai" in models
+        assert "gemini" in models
+    def test_max_tokens_property(self):
+        from config.settings import settings
+        assert settings.max_tokens == 4096
+    def test_temperature_property(self):
+        from config.settings import settings
+        assert settings.temperature == 0.2
+    def test_validation_config(self):
+        from config.settings import settings
+        assert settings.validation["min_wall_thickness_mm"] == 1.5
+    def test_routing_keywords_loaded(self):
+        from config.settings import settings
+        assert "design" in settings.routing["keywords"]
+        assert "engineering" in settings.routing["keywords"]
+    def test_fasteners_loaded(self):
+        from config.settings import settings
+        assert settings.fasteners["M6"] == 6.6
+    def test_materials_loaded(self):
+        from config.settings import settings
+        assert "aluminum" in settings.materials

tests/test_types.py ADDED Viewed

	@@ -0,0 +1,88 @@

+"""Tests for core/types.py — enums, dataclasses, ABC."""
+import pytest
+from core.types import BackendName, AgentId, AgentResponse, ChatResult, LLMBackend
+class TestEnums:
+    def test_backend_names(self):
+        assert BackendName.MOCK == "mock"
+        assert BackendName.ANTHROPIC == "anthropic"
+        assert BackendName.OPENAI == "openai"
+        assert BackendName.GEMINI == "gemini"
+    def test_agent_ids(self):
+        assert AgentId.DESIGN == "design"
+        assert AgentId.ENGINEERING == "engineering"
+        assert AgentId.CNC == "cnc"
+        assert AgentId.CAD == "cad"
+    def test_backend_name_is_string(self):
+        assert isinstance(BackendName.MOCK, str)
+        assert BackendName.MOCK in {"mock", "anthropic"}
+class TestAgentResponse:
+    def test_create(self):
+        r = AgentResponse(agent_id="design", agent_name="Design Agent", message="hello", color="#7c3aed", avatar="DA")
+        assert r.agent_id == "design"
+        assert r.code is None
+    def test_with_code(self):
+        r = AgentResponse(agent_id="cad", agent_name="CAD", message="done", color="#ffab40", avatar="CC", code="result = cq.Workplane().box(10,10,10)")
+        assert r.code is not None
+    def test_to_dict(self):
+        r = AgentResponse(agent_id="design", agent_name="Design Agent", message="hi", color="#7c3aed", avatar="DA")
+        d = r.to_dict()
+        assert d["agent_id"] == "design"
+        assert d["message"] == "hi"
+        assert "code" in d
+class TestChatResult:
+    def test_create_empty(self):
+        result = ChatResult(responses=[])
+        assert result.preview is None
+        assert result.design_state == {}
+    def test_to_dict(self):
+        r = AgentResponse(agent_id="design", agent_name="D", message="hi", color="#fff", avatar="D")
+        result = ChatResult(responses=[r])
+        d = result.to_dict()
+        assert len(d["responses"]) == 1
+        assert d["preview"] is None
+class TestLLMBackendABC:
+    def test_cannot_instantiate(self):
+        with pytest.raises(TypeError):
+            LLMBackend()
+    def test_subclass_must_implement_generate(self):
+        class Incomplete(LLMBackend):
+            pass
+        with pytest.raises(TypeError):
+            Incomplete()
+    def test_subclass_with_generate(self):
+        class Complete(LLMBackend):
+            def generate(self, messages):
+                return "ok"
+        b = Complete()
+        assert b.generate([]) == "ok"
+    def test_split_system_message(self):
+        msgs = [
+            {"role": "system", "content": "You are a bot"},
+            {"role": "user", "content": "hello"},
+        ]
+        system, rest = LLMBackend.split_system_message(msgs)
+        assert system == "You are a bot"
+        assert len(rest) == 1
+        assert rest[0]["role"] == "user"
+    def test_split_system_message_no_system(self):
+        msgs = [{"role": "user", "content": "hello"}]
+        system, rest = LLMBackend.split_system_message(msgs)
+        assert system == ""
+        assert len(rest) == 1

uv.lock CHANGED Viewed

@@ -2552,7 +2552,9 @@ dependencies = [
     { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" },
     { name = "numpy", version = "2.4.4", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" },
     { name = "openai" },
     { name = "python-multipart" },
     { name = "trimesh" },
     { name = "uvicorn" },
 ]
@@ -2574,7 +2576,9 @@ requires-dist = [
     { name = "mcp", specifier = ">=1.0.0" },
     { name = "numpy", specifier = ">=1.24.0" },
     { name = "openai", specifier = ">=1.30.0" },
     { name = "python-multipart", specifier = ">=0.0.9" },
     { name = "trimesh", specifier = ">=4.0.0" },
     { name = "uvicorn", specifier = ">=0.29.0" },
 ]

     { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" },
     { name = "numpy", version = "2.4.4", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" },
     { name = "openai" },
+    { name = "pydantic-settings" },
     { name = "python-multipart" },
+    { name = "pyyaml" },
     { name = "trimesh" },
     { name = "uvicorn" },
 ]
     { name = "mcp", specifier = ">=1.0.0" },
     { name = "numpy", specifier = ">=1.24.0" },
     { name = "openai", specifier = ">=1.30.0" },
+    { name = "pydantic-settings", specifier = ">=2.0.0" },
     { name = "python-multipart", specifier = ">=0.0.9" },
+    { name = "pyyaml", specifier = ">=6.0" },
     { name = "trimesh", specifier = ">=4.0.0" },
     { name = "uvicorn", specifier = ">=0.29.0" },
 ]