Nomearod Claude Opus 4.6 (1M context) commited on
Commit
e9173a5
·
1 Parent(s): ef5d585

fix: address Day 1 audit findings

Browse files

1. Config loader: cwd-first with package-relative fallback (finding 1)
2. OpenAI provider tests: factory, format_tools, mocked complete()
with respx — exercises the real provider path (finding 2)
3. Makefile: bare python → python3 for portability (finding 3)
4. OpenAI SDK: pass api_key explicitly to avoid constructor error
in CI/test environments without OPENAI_API_KEY set

23 tests, all deterministic, lint + mypy clean.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

Makefile CHANGED
@@ -15,16 +15,16 @@ serve:
15
  uvicorn agent_bench.serving.app:create_app --factory --reload --port 8000
16
 
17
  ingest:
18
- python scripts/ingest.py --config configs/tasks/tech_docs.yaml
19
 
20
  evaluate-fast:
21
- python scripts/evaluate.py --config configs/default.yaml --mode deterministic
22
 
23
  evaluate-full:
24
- python scripts/evaluate.py --config configs/default.yaml --mode full
25
 
26
  benchmark:
27
- python scripts/benchmark.py --output docs/benchmark_report.md
28
 
29
  docker:
30
  docker-compose -f docker/docker-compose.yaml up --build
 
15
  uvicorn agent_bench.serving.app:create_app --factory --reload --port 8000
16
 
17
  ingest:
18
+ python3 scripts/ingest.py --config configs/tasks/tech_docs.yaml
19
 
20
  evaluate-fast:
21
+ python3 scripts/evaluate.py --config configs/default.yaml --mode deterministic
22
 
23
  evaluate-full:
24
+ python3 scripts/evaluate.py --config configs/default.yaml --mode full
25
 
26
  benchmark:
27
+ python3 scripts/benchmark.py --output docs/benchmark_report.md
28
 
29
  docker:
30
  docker-compose -f docker/docker-compose.yaml up --build
agent_bench/core/config.py CHANGED
@@ -93,8 +93,15 @@ class TaskFileConfig(BaseModel):
93
 
94
 
95
  def _resolve_config_dir() -> Path:
96
- """Resolve configs directory relative to cwd."""
97
- return Path.cwd() / "configs"
 
 
 
 
 
 
 
98
 
99
 
100
  def load_config(path: Path | None = None) -> AppConfig:
 
93
 
94
 
95
  def _resolve_config_dir() -> Path:
96
+ """Resolve configs directory: cwd first, then package-relative fallback."""
97
+ cwd_configs = Path.cwd() / "configs"
98
+ if cwd_configs.is_dir():
99
+ return cwd_configs
100
+ # Fallback: relative to package location (works for installed packages)
101
+ pkg_configs = Path(__file__).resolve().parent.parent.parent / "configs"
102
+ if pkg_configs.is_dir():
103
+ return pkg_configs
104
+ return cwd_configs # Let the caller get a clear FileNotFoundError
105
 
106
 
107
  def load_config(path: Path | None = None) -> AppConfig:
agent_bench/core/provider.py CHANGED
@@ -152,8 +152,11 @@ class OpenAIProvider(LLMProvider):
152
  except ImportError as e:
153
  raise ImportError("openai package required: pip install openai") from e
154
 
 
 
155
  self.config = config or load_config()
156
- self.client = AsyncOpenAI()
 
157
  self.model = "gpt-4o-mini"
158
  model_pricing = self.config.provider.models.get(self.model)
159
  self._input_cost = model_pricing.input_cost_per_mtok if model_pricing else 0.15
 
152
  except ImportError as e:
153
  raise ImportError("openai package required: pip install openai") from e
154
 
155
+ import os
156
+
157
  self.config = config or load_config()
158
+ api_key = os.environ.get("OPENAI_API_KEY", "")
159
+ self.client = AsyncOpenAI(api_key=api_key)
160
  self.model = "gpt-4o-mini"
161
  model_pricing = self.config.provider.models.get(self.model)
162
  self._input_cost = model_pricing.input_cost_per_mtok if model_pricing else 0.15
tests/test_provider.py CHANGED
@@ -222,6 +222,150 @@ class TestOpenAIFormat:
222
  assert formatted[3]["tool_call_id"] == "call_1"
223
 
224
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
225
  # --- Anthropic stub ---
226
 
227
 
 
222
  assert formatted[3]["tool_call_id"] == "call_1"
223
 
224
 
225
+ # --- OpenAI provider (mocked HTTP) ---
226
+
227
+
228
+ class TestOpenAIProvider:
229
+ def test_factory_creates_openai_provider(self, monkeypatch):
230
+ """Factory returns OpenAIProvider for 'openai' config."""
231
+ monkeypatch.setenv("OPENAI_API_KEY", "test-key-fake")
232
+ from agent_bench.core.provider import OpenAIProvider
233
+
234
+ config = AppConfig(provider=ProviderConfig(default="openai"))
235
+ provider = create_provider(config)
236
+ assert isinstance(provider, OpenAIProvider)
237
+
238
+ def test_format_tools_via_instance(self, monkeypatch):
239
+ """OpenAIProvider.format_tools delegates to format_tools_openai correctly."""
240
+ monkeypatch.setenv("OPENAI_API_KEY", "test-key-fake")
241
+ from agent_bench.core.provider import OpenAIProvider
242
+
243
+ config = AppConfig(provider=ProviderConfig(default="openai"))
244
+ provider = OpenAIProvider(config)
245
+ tools = [
246
+ ToolDefinition(
247
+ name="search_documents",
248
+ description="Search docs",
249
+ parameters={"type": "object", "properties": {"query": {"type": "string"}}},
250
+ )
251
+ ]
252
+ formatted = provider.format_tools(tools)
253
+ assert formatted[0]["type"] == "function"
254
+ assert formatted[0]["function"]["name"] == "search_documents"
255
+
256
+ @pytest.mark.asyncio
257
+ async def test_complete_with_mocked_response(self, monkeypatch):
258
+ """OpenAI complete() parses a mocked API response correctly."""
259
+ monkeypatch.setenv("OPENAI_API_KEY", "test-key-fake")
260
+
261
+ import httpx
262
+ import respx
263
+
264
+ from agent_bench.core.provider import OpenAIProvider
265
+
266
+ config = AppConfig(provider=ProviderConfig(default="openai"))
267
+ provider = OpenAIProvider(config)
268
+
269
+ mock_response = {
270
+ "id": "chatcmpl-test",
271
+ "object": "chat.completion",
272
+ "created": 1234567890,
273
+ "model": "gpt-4o-mini",
274
+ "choices": [
275
+ {
276
+ "index": 0,
277
+ "message": {
278
+ "role": "assistant",
279
+ "content": "FastAPI uses curly braces. [source: path_params.md]",
280
+ "tool_calls": None,
281
+ },
282
+ "finish_reason": "stop",
283
+ }
284
+ ],
285
+ "usage": {"prompt_tokens": 100, "completion_tokens": 30, "total_tokens": 130},
286
+ }
287
+
288
+ with respx.mock:
289
+ respx.post("https://api.openai.com/v1/chat/completions").mock(
290
+ return_value=httpx.Response(200, json=mock_response)
291
+ )
292
+ response = await provider.complete(
293
+ [Message(role=Role.USER, content="How do path params work?")]
294
+ )
295
+
296
+ assert response.content == "FastAPI uses curly braces. [source: path_params.md]"
297
+ assert response.tool_calls == []
298
+ assert response.provider == "openai"
299
+ assert response.usage.input_tokens == 100
300
+ assert response.usage.output_tokens == 30
301
+ assert response.usage.estimated_cost_usd > 0
302
+ assert response.latency_ms > 0
303
+
304
+ @pytest.mark.asyncio
305
+ async def test_complete_parses_tool_calls(self, monkeypatch):
306
+ """OpenAI complete() correctly parses tool_calls from response."""
307
+ monkeypatch.setenv("OPENAI_API_KEY", "test-key-fake")
308
+ import json
309
+
310
+ import httpx
311
+ import respx
312
+
313
+ from agent_bench.core.provider import OpenAIProvider
314
+
315
+ config = AppConfig(provider=ProviderConfig(default="openai"))
316
+ provider = OpenAIProvider(config)
317
+
318
+ mock_response = {
319
+ "id": "chatcmpl-test2",
320
+ "object": "chat.completion",
321
+ "created": 1234567890,
322
+ "model": "gpt-4o-mini",
323
+ "choices": [
324
+ {
325
+ "index": 0,
326
+ "message": {
327
+ "role": "assistant",
328
+ "content": None,
329
+ "tool_calls": [
330
+ {
331
+ "id": "call_abc123",
332
+ "type": "function",
333
+ "function": {
334
+ "name": "search_documents",
335
+ "arguments": json.dumps({"query": "path parameters"}),
336
+ },
337
+ }
338
+ ],
339
+ },
340
+ "finish_reason": "tool_calls",
341
+ }
342
+ ],
343
+ "usage": {"prompt_tokens": 80, "completion_tokens": 20, "total_tokens": 100},
344
+ }
345
+
346
+ tools = [
347
+ ToolDefinition(
348
+ name="search_documents",
349
+ description="Search docs",
350
+ parameters={"type": "object", "properties": {"query": {"type": "string"}}},
351
+ )
352
+ ]
353
+
354
+ with respx.mock:
355
+ respx.post("https://api.openai.com/v1/chat/completions").mock(
356
+ return_value=httpx.Response(200, json=mock_response)
357
+ )
358
+ response = await provider.complete(
359
+ [Message(role=Role.USER, content="search for path params")],
360
+ tools=tools,
361
+ )
362
+
363
+ assert len(response.tool_calls) == 1
364
+ assert response.tool_calls[0].id == "call_abc123"
365
+ assert response.tool_calls[0].name == "search_documents"
366
+ assert response.tool_calls[0].arguments == {"query": "path parameters"}
367
+
368
+
369
  # --- Anthropic stub ---
370
 
371