Spaces:

CallMeDaniel
/

neuralcad

Sleeping

App Files Files Community

neuralcad / tests /test_agent_flow.py

CallMeDaniel

feat: replace keyword gap analyzer with LLM-based analyze_gaps

2919322 about 1 month ago

raw

history blame contribute delete

23 kB

	"""Tests for agents/agent_flow.py — models, routing, readiness, and collection helpers."""

	from __future__ import annotations

	from unittest.mock import patch, MagicMock

	import pytest

	from agents.agent_flow import (
	ADVISOR_IDS,
	GENERATOR_IDS,
	AgentDispatchFlow,
	AgentFlowState,
	AgentResponse,
	ChatTurnResponse,
	PreviewData,
	check_readiness,
	collect_responses,
	extract_code,
	route_agents,
	)
	from agents.design_state import DesignState
	from agents.gap_analyzer import GeneratedQuestionCard


	# ── Helpers ───────────────────────────────────────────────────────────────────


	def _make_response(
	agent_id: str = "design",
	message: str = "Looks good.",
	code: str \| None = None,
	) -> AgentResponse:
	return AgentResponse.from_agent(agent_id, message, code)


	# ── TestAgentResponse ─────────────────────────────────────────────────────────


	class TestAgentResponse:
	def test_from_agent_populates_fields(self):
	resp = AgentResponse.from_agent("design", "Hello design world")
	assert resp.agent_id == "design"
	assert resp.agent_name == "Design Agent"
	assert resp.message == "Hello design world"
	assert resp.color == "#7c3aed"
	assert resp.avatar == "DA"
	assert resp.code is None

	def test_from_agent_with_code(self):
	code = "import cadquery as cq\nresult = cq.Workplane('XY').box(10, 10, 10)"
	resp = AgentResponse.from_agent("cad", "Here is the code", code=code)
	assert resp.agent_id == "cad"
	assert resp.code == code

	def test_engineering_agent_fields(self):
	resp = AgentResponse.from_agent("engineering", "Wall needs to be 3 mm thick")
	assert resp.agent_name == "Engineering Agent"
	assert resp.color == "#00b4d8"
	assert resp.avatar == "EA"

	def test_cnc_agent_fields(self):
	resp = AgentResponse.from_agent("cnc", "3-axis is fine here")
	assert resp.agent_name == "CNC Agent"
	assert resp.color == "#00e676"
	assert resp.avatar == "CA"

	def test_cam_agent_fields(self):
	resp = AgentResponse.from_agent("cam", "Roughing then finishing")
	assert resp.agent_name == "CAM Agent"
	assert resp.color == "#ff6b35"
	assert resp.avatar == "CM"

	def test_cad_agent_fields(self):
	resp = AgentResponse.from_agent("cad", "Generated model")
	assert resp.agent_name == "CAD Coder"
	assert resp.color == "#ffab40"
	assert resp.avatar == "CC"

	def test_model_dump_keys(self):
	resp = AgentResponse.from_agent("design", "A message")
	data = resp.model_dump()
	assert set(data.keys()) == {
	"agent_id", "agent_name", "message", "color", "avatar", "code"
	}


	# ── TestAgentFlowState ────────────────────────────────────────────────────────


	class TestAgentFlowState:
	def test_defaults(self):
	state = AgentFlowState()
	assert state.message == ""
	assert state.context == ""
	assert state.model_str == ""
	assert state.mentions == []
	assert state.is_approved_phase is False
	assert state.active_agent_ids == []
	assert state.knowledge_sources_data == []
	assert state.advisor_responses == []
	assert state.cad_response is None
	assert state.cam_response is None
	assert state.cad_code is None
	assert state.cam_plan is None

	def test_with_inputs(self):
	resp = _make_response("engineering", "Solid choice.")
	state = AgentFlowState(
	message="Make a bracket",
	mentions=["design", "engineering"],
	is_approved_phase=True,
	active_agent_ids=["design", "engineering", "cad"],
	advisor_responses=[resp],
	)
	assert state.message == "Make a bracket"
	assert state.mentions == ["design", "engineering"]
	assert state.is_approved_phase is True
	assert "cad" in state.active_agent_ids
	assert len(state.advisor_responses) == 1

	def test_independent_default_lists(self):
	"""Mutable defaults must not be shared between instances."""
	s1 = AgentFlowState()
	s2 = AgentFlowState()
	s1.mentions.append("design")
	assert s2.mentions == []


	# ── TestExtractCode ───────────────────────────────────────────────────────────


	class TestExtractCode:
	def test_fenced_python_block(self):
	text = "Here is the code:\n```python\nimport cadquery as cq\nresult = cq.Workplane('XY').box(1,1,1)\n```\nDone."
	code = extract_code(text)
	assert code is not None
	assert "import cadquery" in code
	assert "```" not in code

	def test_generic_fence_no_language(self):
	text = "```\ncq.Workplane('XY').box(5,5,5)\n```"
	code = extract_code(text)
	assert code is not None
	assert "cq.Workplane" in code

	def test_unfenced_cq_code_import_marker(self):
	text = "import cadquery as cq\nresult = cq.Workplane('XY').box(2,2,2)"
	code = extract_code(text)
	assert code == text.strip()

	def test_unfenced_cq_dot_marker(self):
	text = "cq.Workplane('XY').box(1,1,1)"
	code = extract_code(text)
	assert code == text.strip()

	def test_unfenced_result_marker(self):
	text = "result = cq.Workplane('XY').box(3,3,3)"
	code = extract_code(text)
	assert code == text.strip()

	def test_plain_text_returns_none(self):
	text = "I need more information about the dimensions before I can proceed."
	code = extract_code(text)
	assert code is None

	def test_empty_string_returns_none(self):
	assert extract_code("") is None

	def test_strips_whitespace_from_fenced_block(self):
	text = "```python\n\n result = 1\n\n```"
	code = extract_code(text)
	assert code == "result = 1"


	# ── TestRouteAgents ───────────────────────────────────────────────────────────


	class TestRouteAgents:
	def test_approved_phase_returns_config_agents(self):
	active = route_agents("anything", [], is_approved_phase=True)
	# config.yaml: approved_agents: ["cad", "cnc"]
	assert set(active) == {"cad", "cnc"}

	def test_mentions_override_keywords(self):
	active = route_agents(
	"design shape and engineering material",
	mentions=["cnc"],
	is_approved_phase=False,
	)
	assert active == ["cnc"]

	def test_design_keyword(self):
	active = route_agents("I want a nice shape and design", [], False)
	assert "design" in active

	def test_engineering_keyword(self):
	active = route_agents("what material and wall thickness should I use?", [], False)
	assert "engineering" in active

	def test_cnc_keyword(self):
	active = route_agents("can we machine this on a 5-axis cnc mill?", [], False)
	assert "cnc" in active

	def test_cam_keyword(self):
	active = route_agents("I need a toolpath and gcode output", [], False)
	assert "cam" in active

	def test_default_no_keyword_match(self):
	active = route_agents("hello there", [], False)
	assert "design" in active
	assert "engineering" in active

	def test_max_agents_capped_at_3(self):
	# Message with keywords for design, engineering, cnc, cam — max is 3
	msg = "design the shape, check the material thickness, machine it on a cnc, toolpath and gcode"
	active = route_agents(msg, [], False)
	# CAD-trigger words absent → at most 3 from keyword scoring
	# (cad trigger could add a 4th only if triggered — this message has none)
	non_cad = [a for a in active if a != "cad"]
	assert len(non_cad) <= 3

	def test_cad_trigger_appended(self):
	active = route_agents("generate the model for me", [], False)
	assert "cad" in active

	def test_no_cad_trigger_absent(self):
	active = route_agents("what is the wall thickness?", [], False)
	# "generate" etc. not present — cad might still appear via keyword score
	# but should NOT be added by the trigger path if already at max capacity
	# Just verify the function does not crash and returns a non-empty list
	assert len(active) > 0

	def test_cad_not_duplicated_when_already_in_keywords(self):
	# Even if "cad" is in keyword scores AND a trigger word is present,
	# it should appear only once.
	active = route_agents("generate a model using cad", [], False)
	assert active.count("cad") <= 1

	def test_approved_phase_ignores_mentions(self):
	# approved phase always wins
	active = route_agents("", ["design"], is_approved_phase=True)
	assert set(active) == {"cad", "cnc"}


	# ── TestCheckReadiness ────────────────────────────────────────────────────────


	class TestCheckReadiness:
	def test_ready_when_clean(self):
	responses = [
	_make_response("design", "Looks good, let's proceed"),
	_make_response("engineering", "Dimensions are confirmed"),
	]
	result = check_readiness(responses, active_agent_ids=["design", "engineering", "cad"])
	assert result == "READY"

	def test_not_ready_when_flagged(self):
	responses = [
	_make_response("design", "Nice shape"),
	_make_response("cad", "NOT READY: missing wall thickness and material"),
	]
	result = check_readiness(responses, active_agent_ids=["design", "cad"])
	assert result == "NOT_READY"

	def test_skip_generation_when_no_generators(self):
	responses = [_make_response("design", "All good")]
	result = check_readiness(responses, active_agent_ids=["design", "engineering"])
	assert result == "SKIP_GENERATION"

	def test_not_ready_case_insensitive(self):
	# The check uses .upper() so mixed-case prefix must still trigger NOT_READY
	responses = [_make_response("cad", "not ready: dimensions missing")]
	result = check_readiness(responses, active_agent_ids=["cad"])
	assert result == "NOT_READY"

	def test_not_ready_leading_whitespace_stripped(self):
	responses = [_make_response("cad", " NOT READY: need more info")]
	result = check_readiness(responses, active_agent_ids=["cad"])
	assert result == "NOT_READY"

	def test_cam_counts_as_generator(self):
	responses = [_make_response("cnc", "Setup looks fine")]
	result = check_readiness(responses, active_agent_ids=["cnc", "cam"])
	assert result == "READY"

	def test_empty_responses_with_generators_is_ready(self):
	result = check_readiness([], active_agent_ids=["cad"])
	assert result == "READY"

	def test_empty_active_ids_skips(self):
	result = check_readiness([], active_agent_ids=[])
	assert result == "SKIP_GENERATION"


	# ── TestCollectResponses ──────────────────────────────────────────────────────


	class TestCollectResponses:
	def test_merges_all(self):
	advisors = [
	_make_response("design", "Shape confirmed"),
	_make_response("engineering", "Material confirmed"),
	]
	cad = _make_response("cad", "Generated model", code="result = ...")
	cam = _make_response("cam", "Toolpath ready")
	result = collect_responses(advisors, cad, cam)
	assert len(result) == 4
	assert result[0].agent_id == "design"
	assert result[1].agent_id == "engineering"
	assert result[2].agent_id == "cad"
	assert result[3].agent_id == "cam"

	def test_handles_none_cad(self):
	advisors = [_make_response("design", "OK")]
	cam = _make_response("cam", "Plan ready")
	result = collect_responses(advisors, cad_response=None, cam_response=cam)
	assert len(result) == 2
	assert result[1].agent_id == "cam"

	def test_handles_none_cam(self):
	advisors = [_make_response("engineering", "OK")]
	cad = _make_response("cad", "Model done")
	result = collect_responses(advisors, cad_response=cad, cam_response=None)
	assert len(result) == 2
	assert result[1].agent_id == "cad"

	def test_handles_both_none(self):
	advisors = [_make_response("cnc", "Ready")]
	result = collect_responses(advisors, cad_response=None, cam_response=None)
	assert len(result) == 1
	assert result[0].agent_id == "cnc"

	def test_handles_empty_advisors(self):
	cad = _make_response("cad", "Model done")
	cam = _make_response("cam", "Plan done")
	result = collect_responses([], cad, cam)
	assert len(result) == 2

	def test_does_not_mutate_input_list(self):
	advisors = [_make_response("design", "OK")]
	original_len = len(advisors)
	collect_responses(advisors, _make_response("cad", "x"), None)
	assert len(advisors) == original_len

	def test_all_none_returns_empty(self):
	result = collect_responses([], None, None)
	assert result == []


	# ── TestAgentDispatchFlow ────────────────────────────────────────────────────


	class TestAgentDispatchFlow:
	def test_no_agents_path(self):
	"""Flow with no matching agents routes through HAS_ADVISORS (default fallback)."""
	flow = AgentDispatchFlow(initial_state=AgentFlowState(
	message="xyzzy",
	context="",
	model_str="gemini/gemini-2.5-flash",
	))
	with patch.object(AgentDispatchFlow, '_run_advisor_crew', return_value=[
	AgentResponse.from_agent("design", "I can help."),
	]):
	flow.kickoff()
	assert flow.state.active_agent_ids == ["design", "engineering"]
	assert len(flow.state.advisor_responses) == 1

	def test_approved_phase_routes_generators_only(self):
	flow = AgentDispatchFlow(initial_state=AgentFlowState(
	message="build it",
	context="## APPROVED PLAN",
	model_str="gemini/gemini-2.5-flash",
	is_approved_phase=True,
	))
	with patch.object(AgentDispatchFlow, '_run_advisor_crew', return_value=[
	AgentResponse.from_agent("cnc", "Looks machinable."),
	]), patch.object(AgentDispatchFlow, '_run_single_agent_crew', return_value="NOT READY: need dims"):
	flow.kickoff()
	assert flow.state.active_agent_ids == ["cad", "cnc"]
	assert flow.state.cad_response is not None
	assert flow.state.cad_response.message.startswith("NOT READY")

	def test_mentions_override(self):
	flow = AgentDispatchFlow(initial_state=AgentFlowState(
	message="check",
	context="",
	model_str="gemini/gemini-2.5-flash",
	mentions=["design"],
	))
	with patch.object(AgentDispatchFlow, '_run_advisor_crew', return_value=[
	AgentResponse.from_agent("design", "Looks good."),
	]):
	flow.kickoff()
	assert flow.state.active_agent_ids == ["design"]

	def test_generators_only_path(self):
	"""Mentions with only generators routes through GENERATORS_ONLY."""
	flow = AgentDispatchFlow(initial_state=AgentFlowState(
	message="generate it",
	context="",
	model_str="gemini/gemini-2.5-flash",
	mentions=["cad"],
	))
	with patch.object(AgentDispatchFlow, '_run_single_agent_crew', return_value="NOT READY: need specs"):
	flow.kickoff()
	assert flow.state.active_agent_ids == ["cad"]
	assert flow.state.advisor_responses == []
	assert flow.state.cad_response is not None

	def test_collect_results_from_advisor_and_cad(self):
	flow = AgentDispatchFlow(initial_state=AgentFlowState(
	message="generate a bracket preview",
	context="",
	model_str="gemini/gemini-2.5-flash",
	))
	with patch.object(AgentDispatchFlow, '_run_advisor_crew', return_value=[
	AgentResponse.from_agent("design", "L-bracket idea."),
	]), patch.object(AgentDispatchFlow, '_run_single_agent_crew', return_value="```python\nimport cadquery as cq\nresult = cq.Workplane('XY').box(10,10,10)\n```"):
	flow.kickoff()
	results = collect_responses(
	flow.state.advisor_responses,
	flow.state.cad_response,
	flow.state.cam_response,
	)
	assert len(results) >= 2 # advisor + cad
	assert flow.state.cad_code is not None


	class TestMemoryHelpers:
	def test_recall_returns_empty_when_no_memory(self):
	flow = AgentDispatchFlow(initial_state=AgentFlowState(
	message="bracket design",
	model_str="gemini/gemini-2.5-flash",
	))
	flow._memory = None
	result = flow._recall_for_agent("design")
	assert result == ""

	def test_recall_formats_matches(self):
	mock_memory = MagicMock()
	mock_match = MagicMock()
	mock_match.record.content = "L-bracket with fillets"
	mock_memory.recall.return_value = [mock_match]

	flow = AgentDispatchFlow(initial_state=AgentFlowState(
	message="bracket",
	model_str="gemini/gemini-2.5-flash",
	))
	flow._memory = mock_memory
	result = flow._recall_for_agent("design")
	assert "## Relevant context from prior turns" in result
	assert "L-bracket with fillets" in result
	mock_memory.recall.assert_called_once()

	def test_recall_returns_empty_when_no_matches(self):
	mock_memory = MagicMock()
	mock_memory.recall.return_value = []

	flow = AgentDispatchFlow(initial_state=AgentFlowState(
	message="bracket",
	model_str="gemini/gemini-2.5-flash",
	))
	flow._memory = mock_memory
	result = flow._recall_for_agent("design")
	assert result == ""

	def test_remember_stores_with_scope(self):
	mock_memory = MagicMock()

	flow = AgentDispatchFlow(initial_state=AgentFlowState(
	message="test",
	model_str="gemini/gemini-2.5-flash",
	))
	flow._memory = mock_memory
	flow._remember_response("engineering", "Use 3mm walls in aluminum.")
	mock_memory.remember.assert_called_once_with(
	"Use 3mm walls in aluminum.",
	scope="/agent/engineering",
	)

	def test_remember_noop_when_no_memory(self):
	flow = AgentDispatchFlow(initial_state=AgentFlowState(
	message="test",
	model_str="gemini/gemini-2.5-flash",
	))
	flow._memory = None
	flow._remember_response("design", "test") # Should not raise


	class TestCollaborationFlag:
	def test_advisors_get_delegation(self):
	flow = AgentDispatchFlow(initial_state=AgentFlowState(
	message="test",
	context="",
	model_str="gemini/gemini-2.5-flash",
	))
	flow._memory = None
	from crewai import LLM
	llm = LLM(model="gemini/gemini-2.5-flash", temperature=0.2)
	agent, task = flow._build_crew_agent("design", llm)
	assert agent.allow_delegation is True

	def test_generators_no_delegation(self):
	flow = AgentDispatchFlow(initial_state=AgentFlowState(
	message="test",
	context="",
	model_str="gemini/gemini-2.5-flash",
	))
	flow._memory = None
	from crewai import LLM
	llm = LLM(model="gemini/gemini-2.5-flash", temperature=0.2)
	agent, task = flow._build_crew_agent("cad", llm)
	assert agent.allow_delegation is False


	# ── TestPreviewData ─────────────────────────────────────────────────────────


	class TestPreviewData:
	def test_success_preview(self):
	p = PreviewData(
	success=True,
	part_name="bracket",
	stl_url="/api/models/bracket.stl",
	step_url="/api/models/bracket.step",
	execution={"success": True, "volume_mm3": 1000.0},
	validation={"machinable": True, "axis_recommendation": "3-axis"},
	)
	assert p.success is True
	assert p.part_name == "bracket"

	def test_failure_preview(self):
	p = PreviewData(success=False, error="Execution failed")
	assert p.success is False
	assert p.error == "Execution failed"

	def test_model_dump(self):
	p = PreviewData(success=True, part_name="gear")
	d = p.model_dump()
	assert d["success"] is True
	assert d["cam"] is None
	assert d["gcode_url"] is None


	# ── TestChatTurnResponse ────────────────────────────────────────────────────


	class TestChatTurnResponse:
	def test_minimal(self):
	r = ChatTurnResponse(design_state=DesignState())
	assert r.responses == []
	assert r.preview is None
	assert r.question_cards == []

	def test_full(self):
	resp = AgentResponse(agent_id="design", agent_name="D", message="hi", color="#fff", avatar="D")
	preview = PreviewData(success=True, part_name="test")
	state = DesignState(material="aluminum")
	card = GeneratedQuestionCard(category="material", question="What material?", responsible_agent="engineering", agent_name="Eng", agent_color="#00e676")
	r = ChatTurnResponse(responses=[resp], preview=preview, design_state=state, question_cards=[card])
	assert len(r.responses) == 1
	assert r.preview.part_name == "test"
	assert r.design_state.material == "aluminum"
	assert len(r.question_cards) == 1

	def test_model_dump_roundtrip(self):
	state = DesignState(part_name="bracket", material="steel")
	r = ChatTurnResponse(design_state=state)
	d = r.model_dump()
	assert d["design_state"]["part_name"] == "bracket"
	assert d["responses"] == []
	assert d["preview"] is None