"""Hour 0-4 skeleton smoke tests.
Verifies the bare minimum:
1. Models import and validate
2. Environment imports and exposes reset/step/state/close
3. reset() returns a typed Observation
4. step() with a stub tool name doesn't crash and advances state
5. submit_optimization closes a round
6. After 3 rounds the episode is terminal
7. Reserved tool names are rejected
"""
from __future__ import annotations
import sys
from pathlib import Path
# Make polyglot_optima importable for tests
sys.path.insert(0, str(Path(__file__).resolve().parents[1]))
import pytest
from models import (
OptimizationAction,
OptimizationObservation,
OptimizationState,
)
from server.environment import PolyglotOptimaEnvironment
def test_models_validate():
"""Pydantic models accept valid input and reject extras."""
action = OptimizationAction(
tool_name="get_hardware_profile",
tool_args={},
reasoning_trace="just exploring",
)
assert action.tool_name == "get_hardware_profile"
obs = OptimizationObservation(done=False, reward=0.0)
assert obs.round_number == 1
state = OptimizationState(episode_id="ep1")
assert state.step_count == 0
assert state.is_terminal is False
assert "function_tier" in state.difficulty_axes
def test_models_reject_extras():
"""extra='forbid' on all three models."""
with pytest.raises(Exception):
OptimizationAction(tool_name="x", unknown_field=42)
def test_environment_has_gym_api():
"""Environment exposes the explicit Gym-style API per plan §12 A."""
env = PolyglotOptimaEnvironment()
assert hasattr(env, "reset")
assert hasattr(env, "step")
assert hasattr(env, "state")
assert hasattr(env, "close")
assert env.SUPPORTS_CONCURRENT_SESSIONS is True
def test_reset_returns_typed_observation():
"""reset() returns an OptimizationObservation with the expected shape."""
env = PolyglotOptimaEnvironment()
obs = env.reset(seed=42)
assert isinstance(obs, OptimizationObservation)
assert obs.done is False
assert obs.round_number == 1
assert obs.python_code != ""
assert "simd" in obs.hardware_profile
assert obs.metadata["episode_id"]
def test_state_introspection():
"""state() returns the in-memory OptimizationState."""
env = PolyglotOptimaEnvironment()
env.reset(seed=42)
s = env.state()
assert isinstance(s, OptimizationState)
assert s.step_count == 0
assert s.round_number == 1
assert s.is_terminal is False
def test_step_targets_most_recent_reset_episode():
"""After multiple resets, step() should target the latest active episode."""
env = PolyglotOptimaEnvironment()
first = env.reset(seed=1)
second = env.reset(seed=2)
result = env.step(OptimizationAction(
tool_name="profile_python_hotspots",
tool_args={},
reasoning_trace="probe",
))
assert result.observation.metadata["episode_id"] == second.metadata["episode_id"]
assert result.observation.metadata["episode_id"] != first.metadata["episode_id"]
def test_step_with_stub_tool_does_not_crash():
"""A non-submit tool call advances step_count, doesn't terminate the episode."""
env = PolyglotOptimaEnvironment()
env.reset(seed=42)
result = env.step(OptimizationAction(
tool_name="profile_python_hotspots",
tool_args={"code": "def f(): pass"},
reasoning_trace="checking hotspots",
))
assert result.done is False
assert env.state().step_count == 1
def test_round_budget_forces_submit():
env = PolyglotOptimaEnvironment(max_calls_per_round=1)
env.reset(seed=42)
first = env.step(OptimizationAction(
tool_name="profile_python_hotspots",
tool_args={"code": "def f(): pass"},
reasoning_trace="probe 1",
))
assert first.done is False
second = env.step(OptimizationAction(
tool_name="analyze_complexity",
tool_args={"code": "def f(): pass"},
reasoning_trace="probe 2",
))
assert second.observation.metadata["forced_submit"] is True
assert second.observation.metadata["tool_called"] == "submit_optimization"
assert env.state().round_number == 2
def test_reserved_tool_names_rejected():
"""OpenEnv reserved names (reset/step/state/close) must not be used as tool names."""
env = PolyglotOptimaEnvironment()
env.reset(seed=42)
with pytest.raises(Exception):
env.step(OptimizationAction(tool_name="reset", tool_args={}, reasoning_trace=""))
with pytest.raises(Exception):
env.step(OptimizationAction(tool_name="close", tool_args={}, reasoning_trace=""))
def test_submit_advances_round():
"""submit_optimization closes the current round and bumps round_number."""
env = PolyglotOptimaEnvironment()
env.reset(seed=42)
result = env.step(OptimizationAction(
tool_name="submit_optimization",
tool_args={"cpp_code": "// stub", "reasoning_trace": "round 1"},
reasoning_trace="round 1",
))
assert result.done is False # 2 more rounds remain
assert env.state().round_number == 2
def test_three_submits_terminate_episode():
"""3 submits → episode terminal, final reward is computed."""
env = PolyglotOptimaEnvironment()
env.reset(seed=42)
for r in range(3):
result = env.step(OptimizationAction(
tool_name="submit_optimization",
tool_args={"cpp_code": "// stub", "reasoning_trace": f"r{r+1}"},
reasoning_trace=f"round {r+1}",
))
assert result.done is True
assert env.state().is_terminal is True
# Final reward in stub mode is 0.0; real values in Hour 10–16
assert isinstance(result.reward, float)
def test_close_clears_sessions():
env = PolyglotOptimaEnvironment()
env.reset(seed=1)
assert env._sessions
env.close()
assert not env._sessions