"""Hour 0-4 skeleton smoke tests. Verifies the bare minimum: 1. Models import and validate 2. Environment imports and exposes reset/step/state/close 3. reset() returns a typed Observation 4. step() with a stub tool name doesn't crash and advances state 5. submit_optimization closes a round 6. After 3 rounds the episode is terminal 7. Reserved tool names are rejected """ from __future__ import annotations import sys from pathlib import Path # Make polyglot_optima importable for tests sys.path.insert(0, str(Path(__file__).resolve().parents[1])) import pytest from models import ( OptimizationAction, OptimizationObservation, OptimizationState, ) from server.environment import PolyglotOptimaEnvironment def test_models_validate(): """Pydantic models accept valid input and reject extras.""" action = OptimizationAction( tool_name="get_hardware_profile", tool_args={}, reasoning_trace="just exploring", ) assert action.tool_name == "get_hardware_profile" obs = OptimizationObservation(done=False, reward=0.0) assert obs.round_number == 1 state = OptimizationState(episode_id="ep1") assert state.step_count == 0 assert state.is_terminal is False assert "function_tier" in state.difficulty_axes def test_models_reject_extras(): """extra='forbid' on all three models.""" with pytest.raises(Exception): OptimizationAction(tool_name="x", unknown_field=42) def test_environment_has_gym_api(): """Environment exposes the explicit Gym-style API per plan §12 A.""" env = PolyglotOptimaEnvironment() assert hasattr(env, "reset") assert hasattr(env, "step") assert hasattr(env, "state") assert hasattr(env, "close") assert env.SUPPORTS_CONCURRENT_SESSIONS is True def test_reset_returns_typed_observation(): """reset() returns an OptimizationObservation with the expected shape.""" env = PolyglotOptimaEnvironment() obs = env.reset(seed=42) assert isinstance(obs, OptimizationObservation) assert obs.done is False assert obs.round_number == 1 assert obs.python_code != "" assert "simd" in obs.hardware_profile assert obs.metadata["episode_id"] def test_state_introspection(): """state() returns the in-memory OptimizationState.""" env = PolyglotOptimaEnvironment() env.reset(seed=42) s = env.state() assert isinstance(s, OptimizationState) assert s.step_count == 0 assert s.round_number == 1 assert s.is_terminal is False def test_step_targets_most_recent_reset_episode(): """After multiple resets, step() should target the latest active episode.""" env = PolyglotOptimaEnvironment() first = env.reset(seed=1) second = env.reset(seed=2) result = env.step(OptimizationAction( tool_name="profile_python_hotspots", tool_args={}, reasoning_trace="probe", )) assert result.observation.metadata["episode_id"] == second.metadata["episode_id"] assert result.observation.metadata["episode_id"] != first.metadata["episode_id"] def test_step_with_stub_tool_does_not_crash(): """A non-submit tool call advances step_count, doesn't terminate the episode.""" env = PolyglotOptimaEnvironment() env.reset(seed=42) result = env.step(OptimizationAction( tool_name="profile_python_hotspots", tool_args={"code": "def f(): pass"}, reasoning_trace="checking hotspots", )) assert result.done is False assert env.state().step_count == 1 def test_round_budget_forces_submit(): env = PolyglotOptimaEnvironment(max_calls_per_round=1) env.reset(seed=42) first = env.step(OptimizationAction( tool_name="profile_python_hotspots", tool_args={"code": "def f(): pass"}, reasoning_trace="probe 1", )) assert first.done is False second = env.step(OptimizationAction( tool_name="analyze_complexity", tool_args={"code": "def f(): pass"}, reasoning_trace="probe 2", )) assert second.observation.metadata["forced_submit"] is True assert second.observation.metadata["tool_called"] == "submit_optimization" assert env.state().round_number == 2 def test_reserved_tool_names_rejected(): """OpenEnv reserved names (reset/step/state/close) must not be used as tool names.""" env = PolyglotOptimaEnvironment() env.reset(seed=42) with pytest.raises(Exception): env.step(OptimizationAction(tool_name="reset", tool_args={}, reasoning_trace="")) with pytest.raises(Exception): env.step(OptimizationAction(tool_name="close", tool_args={}, reasoning_trace="")) def test_submit_advances_round(): """submit_optimization closes the current round and bumps round_number.""" env = PolyglotOptimaEnvironment() env.reset(seed=42) result = env.step(OptimizationAction( tool_name="submit_optimization", tool_args={"cpp_code": "// stub", "reasoning_trace": "round 1"}, reasoning_trace="round 1", )) assert result.done is False # 2 more rounds remain assert env.state().round_number == 2 def test_three_submits_terminate_episode(): """3 submits → episode terminal, final reward is computed.""" env = PolyglotOptimaEnvironment() env.reset(seed=42) for r in range(3): result = env.step(OptimizationAction( tool_name="submit_optimization", tool_args={"cpp_code": "// stub", "reasoning_trace": f"r{r+1}"}, reasoning_trace=f"round {r+1}", )) assert result.done is True assert env.state().is_terminal is True # Final reward in stub mode is 0.0; real values in Hour 10–16 assert isinstance(result.reward, float) def test_close_clears_sessions(): env = PolyglotOptimaEnvironment() env.reset(seed=1) assert env._sessions env.close() assert not env._sessions