Spaces:
Sleeping
Sleeping
| """Hour 0-4 skeleton smoke tests. | |
| Verifies the bare minimum: | |
| 1. Models import and validate | |
| 2. Environment imports and exposes reset/step/state/close | |
| 3. reset() returns a typed Observation | |
| 4. step() with a stub tool name doesn't crash and advances state | |
| 5. submit_optimization closes a round | |
| 6. After 3 rounds the episode is terminal | |
| 7. Reserved tool names are rejected | |
| """ | |
| from __future__ import annotations | |
| import sys | |
| from pathlib import Path | |
| # Make polyglot_optima importable for tests | |
| sys.path.insert(0, str(Path(__file__).resolve().parents[1])) | |
| import pytest | |
| from models import ( | |
| OptimizationAction, | |
| OptimizationObservation, | |
| OptimizationState, | |
| ) | |
| from server.environment import PolyglotOptimaEnvironment | |
| def test_models_validate(): | |
| """Pydantic models accept valid input and reject extras.""" | |
| action = OptimizationAction( | |
| tool_name="get_hardware_profile", | |
| tool_args={}, | |
| reasoning_trace="<think>just exploring</think>", | |
| ) | |
| assert action.tool_name == "get_hardware_profile" | |
| obs = OptimizationObservation(done=False, reward=0.0) | |
| assert obs.round_number == 1 | |
| state = OptimizationState(episode_id="ep1") | |
| assert state.step_count == 0 | |
| assert state.is_terminal is False | |
| assert "function_tier" in state.difficulty_axes | |
| def test_models_reject_extras(): | |
| """extra='forbid' on all three models.""" | |
| with pytest.raises(Exception): | |
| OptimizationAction(tool_name="x", unknown_field=42) | |
| def test_environment_has_gym_api(): | |
| """Environment exposes the explicit Gym-style API per plan §12 A.""" | |
| env = PolyglotOptimaEnvironment() | |
| assert hasattr(env, "reset") | |
| assert hasattr(env, "step") | |
| assert hasattr(env, "state") | |
| assert hasattr(env, "close") | |
| assert env.SUPPORTS_CONCURRENT_SESSIONS is True | |
| def test_reset_returns_typed_observation(): | |
| """reset() returns an OptimizationObservation with the expected shape.""" | |
| env = PolyglotOptimaEnvironment() | |
| obs = env.reset(seed=42) | |
| assert isinstance(obs, OptimizationObservation) | |
| assert obs.done is False | |
| assert obs.round_number == 1 | |
| assert obs.python_code != "" | |
| assert "simd" in obs.hardware_profile | |
| assert obs.metadata["episode_id"] | |
| def test_state_introspection(): | |
| """state() returns the in-memory OptimizationState.""" | |
| env = PolyglotOptimaEnvironment() | |
| env.reset(seed=42) | |
| s = env.state() | |
| assert isinstance(s, OptimizationState) | |
| assert s.step_count == 0 | |
| assert s.round_number == 1 | |
| assert s.is_terminal is False | |
| def test_step_targets_most_recent_reset_episode(): | |
| """After multiple resets, step() should target the latest active episode.""" | |
| env = PolyglotOptimaEnvironment() | |
| first = env.reset(seed=1) | |
| second = env.reset(seed=2) | |
| result = env.step(OptimizationAction( | |
| tool_name="profile_python_hotspots", | |
| tool_args={}, | |
| reasoning_trace="probe", | |
| )) | |
| assert result.observation.metadata["episode_id"] == second.metadata["episode_id"] | |
| assert result.observation.metadata["episode_id"] != first.metadata["episode_id"] | |
| def test_step_with_stub_tool_does_not_crash(): | |
| """A non-submit tool call advances step_count, doesn't terminate the episode.""" | |
| env = PolyglotOptimaEnvironment() | |
| env.reset(seed=42) | |
| result = env.step(OptimizationAction( | |
| tool_name="profile_python_hotspots", | |
| tool_args={"code": "def f(): pass"}, | |
| reasoning_trace="<think>checking hotspots</think>", | |
| )) | |
| assert result.done is False | |
| assert env.state().step_count == 1 | |
| def test_round_budget_forces_submit(): | |
| env = PolyglotOptimaEnvironment(max_calls_per_round=1) | |
| env.reset(seed=42) | |
| first = env.step(OptimizationAction( | |
| tool_name="profile_python_hotspots", | |
| tool_args={"code": "def f(): pass"}, | |
| reasoning_trace="probe 1", | |
| )) | |
| assert first.done is False | |
| second = env.step(OptimizationAction( | |
| tool_name="analyze_complexity", | |
| tool_args={"code": "def f(): pass"}, | |
| reasoning_trace="probe 2", | |
| )) | |
| assert second.observation.metadata["forced_submit"] is True | |
| assert second.observation.metadata["tool_called"] == "submit_optimization" | |
| assert env.state().round_number == 2 | |
| def test_reserved_tool_names_rejected(): | |
| """OpenEnv reserved names (reset/step/state/close) must not be used as tool names.""" | |
| env = PolyglotOptimaEnvironment() | |
| env.reset(seed=42) | |
| with pytest.raises(Exception): | |
| env.step(OptimizationAction(tool_name="reset", tool_args={}, reasoning_trace="")) | |
| with pytest.raises(Exception): | |
| env.step(OptimizationAction(tool_name="close", tool_args={}, reasoning_trace="")) | |
| def test_submit_advances_round(): | |
| """submit_optimization closes the current round and bumps round_number.""" | |
| env = PolyglotOptimaEnvironment() | |
| env.reset(seed=42) | |
| result = env.step(OptimizationAction( | |
| tool_name="submit_optimization", | |
| tool_args={"cpp_code": "// stub", "reasoning_trace": "<think>round 1</think>"}, | |
| reasoning_trace="<think>round 1</think>", | |
| )) | |
| assert result.done is False # 2 more rounds remain | |
| assert env.state().round_number == 2 | |
| def test_three_submits_terminate_episode(): | |
| """3 submits → episode terminal, final reward is computed.""" | |
| env = PolyglotOptimaEnvironment() | |
| env.reset(seed=42) | |
| for r in range(3): | |
| result = env.step(OptimizationAction( | |
| tool_name="submit_optimization", | |
| tool_args={"cpp_code": "// stub", "reasoning_trace": f"r{r+1}"}, | |
| reasoning_trace=f"<think>round {r+1}</think>", | |
| )) | |
| assert result.done is True | |
| assert env.state().is_terminal is True | |
| # Final reward in stub mode is 0.0; real values in Hour 10–16 | |
| assert isinstance(result.reward, float) | |
| def test_close_clears_sessions(): | |
| env = PolyglotOptimaEnvironment() | |
| env.reset(seed=1) | |
| assert env._sessions | |
| env.close() | |
| assert not env._sessions | |