Spaces:

Swastikr
/

polyglot_optima

Sleeping

App Files Files Community

polyglot_optima / tests /test_skeleton.py

Swastikr

Upload folder using huggingface_hub

2bf863e verified about 1 month ago

raw

history blame contribute delete

5.97 kB

	"""Hour 0-4 skeleton smoke tests.

	Verifies the bare minimum:
	1. Models import and validate
	2. Environment imports and exposes reset/step/state/close
	3. reset() returns a typed Observation
	4. step() with a stub tool name doesn't crash and advances state
	5. submit_optimization closes a round
	6. After 3 rounds the episode is terminal
	7. Reserved tool names are rejected
	"""

	from __future__ import annotations

	import sys
	from pathlib import Path

	# Make polyglot_optima importable for tests
	sys.path.insert(0, str(Path(__file__).resolve().parents[1]))

	import pytest

	from models import (
	OptimizationAction,
	OptimizationObservation,
	OptimizationState,
	)
	from server.environment import PolyglotOptimaEnvironment


	def test_models_validate():
	"""Pydantic models accept valid input and reject extras."""
	action = OptimizationAction(
	tool_name="get_hardware_profile",
	tool_args={},
	reasoning_trace="<think>just exploring</think>",
	)
	assert action.tool_name == "get_hardware_profile"

	obs = OptimizationObservation(done=False, reward=0.0)
	assert obs.round_number == 1

	state = OptimizationState(episode_id="ep1")
	assert state.step_count == 0
	assert state.is_terminal is False
	assert "function_tier" in state.difficulty_axes


	def test_models_reject_extras():
	"""extra='forbid' on all three models."""
	with pytest.raises(Exception):
	OptimizationAction(tool_name="x", unknown_field=42)


	def test_environment_has_gym_api():
	"""Environment exposes the explicit Gym-style API per plan §12 A."""
	env = PolyglotOptimaEnvironment()
	assert hasattr(env, "reset")
	assert hasattr(env, "step")
	assert hasattr(env, "state")
	assert hasattr(env, "close")
	assert env.SUPPORTS_CONCURRENT_SESSIONS is True


	def test_reset_returns_typed_observation():
	"""reset() returns an OptimizationObservation with the expected shape."""
	env = PolyglotOptimaEnvironment()
	obs = env.reset(seed=42)
	assert isinstance(obs, OptimizationObservation)
	assert obs.done is False
	assert obs.round_number == 1
	assert obs.python_code != ""
	assert "simd" in obs.hardware_profile
	assert obs.metadata["episode_id"]


	def test_state_introspection():
	"""state() returns the in-memory OptimizationState."""
	env = PolyglotOptimaEnvironment()
	env.reset(seed=42)
	s = env.state()
	assert isinstance(s, OptimizationState)
	assert s.step_count == 0
	assert s.round_number == 1
	assert s.is_terminal is False


	def test_step_targets_most_recent_reset_episode():
	"""After multiple resets, step() should target the latest active episode."""
	env = PolyglotOptimaEnvironment()
	first = env.reset(seed=1)
	second = env.reset(seed=2)
	result = env.step(OptimizationAction(
	tool_name="profile_python_hotspots",
	tool_args={},
	reasoning_trace="probe",
	))
	assert result.observation.metadata["episode_id"] == second.metadata["episode_id"]
	assert result.observation.metadata["episode_id"] != first.metadata["episode_id"]


	def test_step_with_stub_tool_does_not_crash():
	"""A non-submit tool call advances step_count, doesn't terminate the episode."""
	env = PolyglotOptimaEnvironment()
	env.reset(seed=42)
	result = env.step(OptimizationAction(
	tool_name="profile_python_hotspots",
	tool_args={"code": "def f(): pass"},
	reasoning_trace="<think>checking hotspots</think>",
	))
	assert result.done is False
	assert env.state().step_count == 1


	def test_round_budget_forces_submit():
	env = PolyglotOptimaEnvironment(max_calls_per_round=1)
	env.reset(seed=42)
	first = env.step(OptimizationAction(
	tool_name="profile_python_hotspots",
	tool_args={"code": "def f(): pass"},
	reasoning_trace="probe 1",
	))
	assert first.done is False
	second = env.step(OptimizationAction(
	tool_name="analyze_complexity",
	tool_args={"code": "def f(): pass"},
	reasoning_trace="probe 2",
	))
	assert second.observation.metadata["forced_submit"] is True
	assert second.observation.metadata["tool_called"] == "submit_optimization"
	assert env.state().round_number == 2


	def test_reserved_tool_names_rejected():
	"""OpenEnv reserved names (reset/step/state/close) must not be used as tool names."""
	env = PolyglotOptimaEnvironment()
	env.reset(seed=42)
	with pytest.raises(Exception):
	env.step(OptimizationAction(tool_name="reset", tool_args={}, reasoning_trace=""))
	with pytest.raises(Exception):
	env.step(OptimizationAction(tool_name="close", tool_args={}, reasoning_trace=""))


	def test_submit_advances_round():
	"""submit_optimization closes the current round and bumps round_number."""
	env = PolyglotOptimaEnvironment()
	env.reset(seed=42)
	result = env.step(OptimizationAction(
	tool_name="submit_optimization",
	tool_args={"cpp_code": "// stub", "reasoning_trace": "<think>round 1</think>"},
	reasoning_trace="<think>round 1</think>",
	))
	assert result.done is False # 2 more rounds remain
	assert env.state().round_number == 2


	def test_three_submits_terminate_episode():
	"""3 submits → episode terminal, final reward is computed."""
	env = PolyglotOptimaEnvironment()
	env.reset(seed=42)
	for r in range(3):
	result = env.step(OptimizationAction(
	tool_name="submit_optimization",
	tool_args={"cpp_code": "// stub", "reasoning_trace": f"r{r+1}"},
	reasoning_trace=f"<think>round {r+1}</think>",
	))
	assert result.done is True
	assert env.state().is_terminal is True
	# Final reward in stub mode is 0.0; real values in Hour 10–16
	assert isinstance(result.reward, float)


	def test_close_clears_sessions():
	env = PolyglotOptimaEnvironment()
	env.reset(seed=1)
	assert env._sessions
	env.close()
	assert not env._sessions