Spaces:
Running on Zero
Running on Zero
| """Validated structured-output tests β fully offline, instructor + litellm faked. | |
| No network and no real credentials. Three layers are covered: | |
| * ``build_output_model`` is pure Pydantic: it constrains ``kind`` to the | |
| allowed grant and requires ``text`` (+ extra fields). | |
| * ``LiteLLMProvider.complete_structured`` wraps a faked | |
| ``instructor.from_litellm`` client and reads tokens + cost from the raw | |
| completion, mirroring ``complete``. | |
| * ``ManifestAgent`` takes the structured path when the provider offers | |
| ``complete_structured`` (validated payload, no ``_raw_fallback``) and the | |
| tolerant-parser path with the deterministic stub. | |
| """ | |
| from __future__ import annotations | |
| import sys | |
| import types | |
| from dataclasses import dataclass | |
| import pytest | |
| from pydantic import BaseModel, ValidationError | |
| from src.agents.base import ManifestAgent | |
| from src.core.manifest import AgentManifest | |
| from src.core.projections import StageProjection | |
| from src.core.structured import AgentOutputError, build_output_model | |
| from src.models.litellm_provider import LiteLLMProvider | |
| from src.models.router import ModelRouter | |
| # ββ build_output_model (pure Pydantic) βββββββββββββββββββββββββββββββββββββββββ | |
| class TestBuildOutputModel: | |
| def test_accepts_valid_kind(self): | |
| model = build_output_model(["agent.spoke", "judge.verdict"]) | |
| out = model(kind="agent.spoke", text="I collect echoes.") | |
| assert out.kind == "agent.spoke" | |
| assert out.text == "I collect echoes." | |
| def test_rejects_kind_not_in_allowed(self): | |
| model = build_output_model(["agent.spoke"]) | |
| with pytest.raises(ValidationError): | |
| model(kind="not.real", text="oops") | |
| def test_single_kind_still_constrains(self): | |
| model = build_output_model(["world.observed"]) | |
| assert model(kind="world.observed", text="A booth opens.").kind == "world.observed" | |
| with pytest.raises(ValidationError): | |
| model(kind="judge.verdict", text="x") | |
| def test_extra_fields_required(self): | |
| model = build_output_model(["agent.spoke"], ["emotion"]) | |
| out = model(kind="agent.spoke", text="hi", emotion="puzzled") | |
| assert out.emotion == "puzzled" | |
| with pytest.raises(ValidationError): | |
| model(kind="agent.spoke", text="hi") # emotion missing | |
| def test_text_required(self): | |
| model = build_output_model(["agent.spoke"]) | |
| with pytest.raises(ValidationError): | |
| model(kind="agent.spoke") | |
| def test_empty_allowed_kinds_raises(self): | |
| with pytest.raises(AgentOutputError): | |
| build_output_model([]) | |
| def test_is_subclass_of_basemodel(self): | |
| model = build_output_model(["agent.spoke"]) | |
| assert issubclass(model, BaseModel) | |
| # ββ fake instructor + litellm for the provider ββββββββββββββββββββββββββββββββββ | |
| class _FakeUsage: | |
| prompt_tokens: int = 11 | |
| completion_tokens: int = 7 | |
| total_tokens: int = 18 | |
| class _FakeRawCompletion: | |
| """Raw completion Instructor returns alongside the parsed model.""" | |
| def __init__(self, *, hidden_cost: float | None = None) -> None: | |
| self.usage = _FakeUsage() | |
| self._hidden_params = {} if hidden_cost is None else {"response_cost": hidden_cost} | |
| class _FakeInstructorClient: | |
| def __init__(self, *, hidden_cost=None, raise_exc=None, record=None) -> None: | |
| self._hidden_cost = hidden_cost | |
| self._raise = raise_exc | |
| self._record = record | |
| def create_with_completion(self, *, response_model, **kwargs): | |
| if self._record is not None: | |
| self._record.update(kwargs) | |
| self._record["response_model"] = response_model | |
| if self._raise is not None: | |
| raise self._raise | |
| # Instructor returns a validated instance of the requested model. | |
| result = response_model(kind=response_model.model_fields["kind"].annotation.__args__[0], text="a mossy booth") | |
| return result, _FakeRawCompletion(hidden_cost=self._hidden_cost) | |
| def _install_fakes(monkeypatch, *, client, from_litellm_kw: dict | None = None) -> None: | |
| """Inject fake ``instructor`` (from_litellm -> client) and ``litellm`` modules. | |
| *from_litellm_kw*, when given, records the kwargs ``complete_structured`` passes to | |
| ``instructor.from_litellm`` (e.g. the chosen ``mode``) for assertion. | |
| """ | |
| fake_litellm = types.ModuleType("litellm") | |
| fake_litellm.completion = lambda **kw: None | |
| def _completion_cost(completion_response=None, **_kw): | |
| return 0.0 | |
| fake_litellm.completion_cost = _completion_cost | |
| def _from_litellm(completion, **kw): | |
| if from_litellm_kw is not None: | |
| from_litellm_kw.update(kw) | |
| return client | |
| fake_instructor = types.ModuleType("instructor") | |
| fake_instructor.from_litellm = _from_litellm | |
| # Mode is an enum on the real package; a name->value stand-in is enough for the | |
| # provider's ``getattr(instructor.Mode, structured_mode.upper())`` resolution. | |
| fake_instructor.Mode = types.SimpleNamespace(JSON_SCHEMA="json_schema", JSON="json", TOOLS="tools") | |
| monkeypatch.setitem(sys.modules, "litellm", fake_litellm) | |
| monkeypatch.setitem(sys.modules, "instructor", fake_instructor) | |
| # ββ provider.complete_structured ββββββββββββββββββββββββββββββββββββββββββββββββ | |
| class TestCompleteStructured: | |
| def test_returns_validated_model_and_captures_usage(self, monkeypatch): | |
| _install_fakes(monkeypatch, client=_FakeInstructorClient()) | |
| provider = LiteLLMProvider(model="openai/m", api_base="https://x/v1") | |
| model = build_output_model(["world.observed"]) | |
| out = provider.complete_structured("scene-whisperer", "grow the wood", model) | |
| assert isinstance(out, model) | |
| assert out.kind == "world.observed" | |
| assert provider.last_usage["prompt_tokens"] == 11 | |
| assert provider.last_usage["completion_tokens"] == 7 | |
| assert provider.last_usage["total_tokens"] == 18 | |
| def test_captures_cost_from_hidden_params(self, monkeypatch): | |
| _install_fakes(monkeypatch, client=_FakeInstructorClient(hidden_cost=0.05)) | |
| provider = LiteLLMProvider(model="openai/m", api_base="https://x/v1") | |
| provider.complete_structured("echo", "drop a pebble", build_output_model(["agent.spoke"])) | |
| assert provider.last_usage["cost_usd"] == pytest.approx(0.05) | |
| assert provider.last_cost == pytest.approx(0.05) | |
| def test_passes_response_model_and_retries(self, monkeypatch): | |
| record: dict = {} | |
| _install_fakes(monkeypatch, client=_FakeInstructorClient(record=record)) | |
| provider = LiteLLMProvider(model="openai/m", api_base="https://x/v1", max_retries=4) | |
| model = build_output_model(["world.observed"]) | |
| provider.complete_structured("seedkeeper", "observe", model) | |
| assert record["response_model"] is model | |
| assert record["max_retries"] == 4 | |
| assert record["model"] == "openai/m" | |
| roles = [m["role"] for m in record["messages"]] | |
| assert roles == ["system", "user"] | |
| def test_defaults_to_guided_json_schema_mode(self, monkeypatch): | |
| # Guided decoding, not tool calling: a model with no tool-call parser (e.g. MiniCPM) | |
| # still validates instead of 400ing. The mode rides on from_litellm, not the call. | |
| kw: dict = {} | |
| _install_fakes(monkeypatch, client=_FakeInstructorClient(), from_litellm_kw=kw) | |
| provider = LiteLLMProvider(model="openai/m", api_base="https://x/v1") | |
| provider.complete_structured("echo", "x", build_output_model(["agent.spoke"])) | |
| assert kw["mode"] == "json_schema" | |
| def test_structured_mode_override_is_honored(self, monkeypatch): | |
| kw: dict = {} | |
| _install_fakes(monkeypatch, client=_FakeInstructorClient(), from_litellm_kw=kw) | |
| provider = LiteLLMProvider(model="openai/m", api_base="https://x/v1", structured_mode="tools") | |
| provider.complete_structured("echo", "x", build_output_model(["agent.spoke"])) | |
| assert kw["mode"] == "tools" | |
| def test_error_zeroes_usage_and_reraises(self, monkeypatch): | |
| _install_fakes(monkeypatch, client=_FakeInstructorClient(raise_exc=RuntimeError("boom"))) | |
| provider = LiteLLMProvider(model="openai/m", api_base="https://x/v1") | |
| with pytest.raises(RuntimeError): | |
| provider.complete_structured("echo", "x", build_output_model(["agent.spoke"])) | |
| assert provider.last_usage["total_tokens"] == 0 | |
| assert provider.last_usage["cost_usd"] == 0.0 | |
| # ββ ManifestAgent path selection ββββββββββββββββββββββββββββββββββββββββββββββββ | |
| class _Agent(ManifestAgent): | |
| manifest = AgentManifest( | |
| name="scene-whisperer", | |
| persona="You grow the wood in one strange sentence.", | |
| may_emit=["world.observed"], | |
| model_profile="tiny", | |
| ) | |
| class _StructuredProvider: | |
| """Stand-in live provider exposing complete_structured.""" | |
| last_usage: dict = None # type: ignore[assignment] | |
| seen_model: object = None | |
| def __post_init__(self): | |
| self.last_usage = { | |
| "prompt_tokens": 5, | |
| "completion_tokens": 3, | |
| "total_tokens": 8, | |
| "cost_usd": 0.002, | |
| } | |
| def complete_structured(self, role, prompt, response_model): | |
| self.seen_model = response_model | |
| return response_model(kind="world.observed", text="A booth opens in a root.") | |
| def complete(self, role, prompt): # pragma: no cover - must not be reached | |
| raise AssertionError("structured path must not call complete()") | |
| class _FixedRouter: | |
| provider: object | |
| def for_profile(self, profile): | |
| return self.provider | |
| class TestManifestAgentStructuredPath: | |
| def test_uses_structured_path_when_available(self): | |
| provider = _StructuredProvider() | |
| agent = _Agent(_FixedRouter(provider)) | |
| ev = agent.act("r", 1, StageProjection(seed="moss"), ()) | |
| assert ev.kind == "world.observed" | |
| assert ev.payload["text"] == "A booth opens in a root." | |
| # The validated path never wraps prose, so no fallback marker is present. | |
| assert "_raw_fallback" not in ev.payload | |
| # Cost/tokens flowed through from the provider for the Governor. | |
| assert agent.last_usage["cost_usd"] == pytest.approx(0.002) | |
| assert agent.last_usage["total_tokens"] == 8 | |
| # The constructed model was constrained to the manifest's may_emit. | |
| assert provider.seen_model.model_fields["kind"].annotation.__args__ == ("world.observed",) | |
| def test_deterministic_stub_uses_parser_path(self): | |
| # Offline router yields the stub, which has no complete_structured: the | |
| # tolerant parser runs and (for prose) marks the fallback. | |
| agent = _Agent(ModelRouter(offline=True)) | |
| provider = agent.router.for_profile("tiny") | |
| assert not hasattr(provider, "complete_structured") | |
| ev = agent.act("r", 1, StageProjection(seed="moss"), ()) | |
| assert ev.kind == "world.observed" # coerced to the only allowed kind | |
| assert ev.payload.get("_raw_fallback") is True | |
| assert agent.last_usage["total_tokens"] > 0 | |
| def test_structured_failure_falls_back_to_parser(self): | |
| # If the live structured call raises, the agent still produces an event | |
| # via the parser path rather than dropping the turn. | |
| class _FailingProvider: | |
| def __init__(self): | |
| self.last_usage = {"total_tokens": 0, "cost_usd": 0.0} | |
| self.calls = [] | |
| def complete_structured(self, role, prompt, response_model): | |
| raise RuntimeError("validation exhausted") | |
| def complete(self, role, prompt): | |
| self.calls.append(prompt) | |
| self.last_usage = { | |
| "prompt_tokens": 4, | |
| "completion_tokens": 2, | |
| "total_tokens": 6, | |
| "cost_usd": 0.0, | |
| } | |
| return '{"kind": "world.observed", "text": "fallback line"}' | |
| provider = _FailingProvider() | |
| agent = _Agent(_FixedRouter(provider)) | |
| ev = agent.act("r", 1, StageProjection(), ()) | |
| assert ev.kind == "world.observed" | |
| assert ev.payload["text"] == "fallback line" | |
| assert provider.calls, "fallback should call complete()" | |
| assert agent.last_usage["total_tokens"] == 6 | |