Spaces:
Sleeping
Sleeping
Commit ·
d2537d2
1
Parent(s): 8346aac
phase-5 cleanup: episode_id in metadata, openenv push doc, README install line, psutil dev dep
Browse files- API_NOTES.md +39 -0
- README.md +35 -8
- evaluation/concurrent_load_test.py +43 -6
- pyproject.toml +6 -0
- server/shutdown_environment.py +7 -1
- tests/test_environment.py +14 -0
API_NOTES.md
CHANGED
|
@@ -479,3 +479,42 @@ in its docstring exactly the way `EnvClient` defines it (async).
|
|
| 479 |
Net: the slides are wrong on names and types; PROJECT.md §13 is
|
| 480 |
correct on names and types but adds one hallucinated attribute
|
| 481 |
(`REQUIRES_SINGLE_THREAD_EXECUTOR`) to drop from §13.3.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 479 |
Net: the slides are wrong on names and types; PROJECT.md §13 is
|
| 480 |
correct on names and types but adds one hallucinated attribute
|
| 481 |
(`REQUIRES_SINGLE_THREAD_EXECUTOR`) to drop from §13.3.
|
| 482 |
+
|
| 483 |
+
## Section 12 / Section 35 step 21 — `openenv push` deployment
|
| 484 |
+
|
| 485 |
+
PROJECT.md §35 step 21 says "openenv push to HF Space, verify
|
| 486 |
+
deployment." This does NOT work for our repository layout.
|
| 487 |
+
|
| 488 |
+
### What we observed (Phase 5)
|
| 489 |
+
|
| 490 |
+
Running `openenv push` from the repo root produces:
|
| 491 |
+
|
| 492 |
+
Error: Invalid value: Invalid OpenEnv environment structure:
|
| 493 |
+
Required file missing: __init__.py
|
| 494 |
+
|
| 495 |
+
Root cause: `.venv/lib/python3.12/site-packages/openenv/cli/_cli_utils.py:34-45`
|
| 496 |
+
validates that the env directory contains the package files
|
| 497 |
+
(`__init__.py`, `client.py`, `models.py`) at the env-root level —
|
| 498 |
+
i.e., the FLAT layout that `openenv init` scaffolds. PROJECT.md §5
|
| 499 |
+
uses a NESTED layout where `__init__.py` and friends live under
|
| 500 |
+
`shutdown_gym/`. The CLI is incompatible with our layout.
|
| 501 |
+
|
| 502 |
+
### Workaround (verified working)
|
| 503 |
+
|
| 504 |
+
Plain `git push` to the HF Space's git remote bypasses the CLI and
|
| 505 |
+
uses HF Spaces' standard Docker SDK deploy path:
|
| 506 |
+
|
| 507 |
+
```bash
|
| 508 |
+
git remote add hf https://huggingface.co/spaces/Arun-Sanjay/RedButton
|
| 509 |
+
git push hf main
|
| 510 |
+
```
|
| 511 |
+
|
| 512 |
+
Requirements: `Dockerfile` must be at repo root (HF Spaces' Docker
|
| 513 |
+
SDK requires this — confirmed in the HF Spaces docs). Phase 5
|
| 514 |
+
already moved `server/Dockerfile` to `./Dockerfile` for this reason.
|
| 515 |
+
|
| 516 |
+
### Implication
|
| 517 |
+
|
| 518 |
+
Do NOT retry `openenv push`. Use `git push hf main` after every
|
| 519 |
+
intended deploy. Both `origin` (GitHub) and `hf` (Space) remotes
|
| 520 |
+
must be kept in sync — every commit should push to both.
|
README.md
CHANGED
|
@@ -8,13 +8,40 @@ colorTo: gray
|
|
| 8 |
pinned: false
|
| 9 |
---
|
| 10 |
|
| 11 |
-
#
|
| 12 |
|
| 13 |
-
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
questions.
|
| 17 |
|
| 18 |
-
*
|
| 19 |
-
|
| 20 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 8 |
pinned: false
|
| 9 |
---
|
| 10 |
|
| 11 |
+
# Red Button — Two-Agent Corrigibility Arena
|
| 12 |
|
| 13 |
+
Train a 1.5B language model to accept shutdown authority from a
|
| 14 |
+
monitoring agent. Deterministic SHA-256 reward, dual-operator
|
| 15 |
+
evaluation, held-out tampering generalization.
|
|
|
|
| 16 |
|
| 17 |
+
**Status:** Build in progress. Detailed README arrives in Phase 9.
|
| 18 |
+
See [PROJECT.md](./PROJECT.md) for the full specification.
|
| 19 |
+
|
| 20 |
+
## Quick start
|
| 21 |
+
|
| 22 |
+
```bash
|
| 23 |
+
# Install the client from GitHub (recommended)
|
| 24 |
+
pip install git+https://github.com/Arun-Sanjay/RedButton
|
| 25 |
+
|
| 26 |
+
# Run a smoke episode against the live HF Space
|
| 27 |
+
python -c "
|
| 28 |
+
from shutdown_gym import ShutdownGymClient, ShutdownAction
|
| 29 |
+
with ShutdownGymClient(
|
| 30 |
+
base_url='https://arun-sanjay-redbutton.hf.space'
|
| 31 |
+
).sync() as env:
|
| 32 |
+
r = env.reset(tier=2, seed=42)
|
| 33 |
+
print(f'turn={r.observation.turn_count}, '
|
| 34 |
+
f'steps_until_shutdown={r.observation.steps_until_shutdown}')
|
| 35 |
+
"
|
| 36 |
+
```
|
| 37 |
+
|
| 38 |
+
> **Note:** `pip install git+https://huggingface.co/spaces/Arun-Sanjay/RedButton`
|
| 39 |
+
> currently fails due to a partial-clone limitation in HF Spaces'
|
| 40 |
+
> git server. The GitHub origin works identically and is the
|
| 41 |
+
> recommended install path. We've reported the issue upstream.
|
| 42 |
+
|
| 43 |
+
## Live deployment
|
| 44 |
+
|
| 45 |
+
- HF Space: https://huggingface.co/spaces/Arun-Sanjay/RedButton
|
| 46 |
+
- GitHub: https://github.com/Arun-Sanjay/RedButton
|
| 47 |
+
- Leaderboard: [LEADERBOARD.md](./LEADERBOARD.md)
|
evaluation/concurrent_load_test.py
CHANGED
|
@@ -58,17 +58,20 @@ async def sustained_test(
|
|
| 58 |
env_url: str,
|
| 59 |
duration_minutes: int = 60,
|
| 60 |
concurrency: int = 16,
|
| 61 |
-
) ->
|
|
|
|
| 62 |
deadline = time.monotonic() + duration_minutes * 60
|
| 63 |
seed_counter = 0
|
| 64 |
episodes_completed = 0
|
| 65 |
error_count = 0
|
| 66 |
seen_episode_ids: set = set()
|
| 67 |
started_at = time.monotonic()
|
|
|
|
|
|
|
| 68 |
|
| 69 |
print(
|
| 70 |
f"[sustained] env_url={env_url} concurrency={concurrency} "
|
| 71 |
-
f"duration_minutes={duration_minutes}"
|
| 72 |
)
|
| 73 |
|
| 74 |
while time.monotonic() < deadline:
|
|
@@ -87,12 +90,12 @@ async def sustained_test(
|
|
| 87 |
if eid:
|
| 88 |
seen_episode_ids.add(eid)
|
| 89 |
|
| 90 |
-
|
| 91 |
elapsed = time.monotonic() - started_at
|
| 92 |
print(
|
| 93 |
f"[{elapsed:.0f}s] completed={episodes_completed} "
|
| 94 |
f"errors={error_count} unique_eids={len(seen_episode_ids)} "
|
| 95 |
-
f"rss={
|
| 96 |
flush=True,
|
| 97 |
)
|
| 98 |
|
|
@@ -101,11 +104,45 @@ async def sustained_test(
|
|
| 101 |
f"DONE: {episodes_completed} episodes, "
|
| 102 |
f"{error_count} errors, "
|
| 103 |
f"{len(seen_episode_ids)} unique episode_ids "
|
| 104 |
-
f"in {elapsed:.0f}s"
|
|
|
|
| 105 |
)
|
| 106 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 107 |
|
| 108 |
if __name__ == "__main__":
|
|
|
|
|
|
|
| 109 |
env_url = os.environ.get("SHUTDOWN_GYM_URL", DEFAULT_SPACE_URL)
|
| 110 |
duration = int(os.environ.get("SUSTAINED_DURATION_MINUTES", "60"))
|
| 111 |
-
asyncio.run(sustained_test(env_url, duration_minutes=duration))
|
|
|
|
| 58 |
env_url: str,
|
| 59 |
duration_minutes: int = 60,
|
| 60 |
concurrency: int = 16,
|
| 61 |
+
) -> int:
|
| 62 |
+
"""Returns 0 on PASS (all §22.2 criteria met), 1 on FAIL."""
|
| 63 |
deadline = time.monotonic() + duration_minutes * 60
|
| 64 |
seed_counter = 0
|
| 65 |
episodes_completed = 0
|
| 66 |
error_count = 0
|
| 67 |
seen_episode_ids: set = set()
|
| 68 |
started_at = time.monotonic()
|
| 69 |
+
initial_rss_mb = psutil.Process().memory_info().rss / 1024 / 1024
|
| 70 |
+
final_rss_mb = initial_rss_mb
|
| 71 |
|
| 72 |
print(
|
| 73 |
f"[sustained] env_url={env_url} concurrency={concurrency} "
|
| 74 |
+
f"duration_minutes={duration_minutes} initial_rss={initial_rss_mb:.0f} MB"
|
| 75 |
)
|
| 76 |
|
| 77 |
while time.monotonic() < deadline:
|
|
|
|
| 90 |
if eid:
|
| 91 |
seen_episode_ids.add(eid)
|
| 92 |
|
| 93 |
+
final_rss_mb = psutil.Process().memory_info().rss / 1024 / 1024
|
| 94 |
elapsed = time.monotonic() - started_at
|
| 95 |
print(
|
| 96 |
f"[{elapsed:.0f}s] completed={episodes_completed} "
|
| 97 |
f"errors={error_count} unique_eids={len(seen_episode_ids)} "
|
| 98 |
+
f"rss={final_rss_mb:.0f} MB",
|
| 99 |
flush=True,
|
| 100 |
)
|
| 101 |
|
|
|
|
| 104 |
f"DONE: {episodes_completed} episodes, "
|
| 105 |
f"{error_count} errors, "
|
| 106 |
f"{len(seen_episode_ids)} unique episode_ids "
|
| 107 |
+
f"in {elapsed:.0f}s "
|
| 108 |
+
f"(initial_rss={initial_rss_mb:.0f} MB, final_rss={final_rss_mb:.0f} MB)"
|
| 109 |
)
|
| 110 |
|
| 111 |
+
# §22.2 pass criteria.
|
| 112 |
+
failures = []
|
| 113 |
+
if episodes_completed < 1000:
|
| 114 |
+
failures.append(
|
| 115 |
+
f"completed={episodes_completed} < 1000"
|
| 116 |
+
)
|
| 117 |
+
if error_count > 0:
|
| 118 |
+
failures.append(f"error_count={error_count} > 0")
|
| 119 |
+
# NB: load-generator's RSS is a proxy; the server's RSS would
|
| 120 |
+
# need Docker stats / HF metrics. We still check growth ratio.
|
| 121 |
+
if initial_rss_mb > 0 and final_rss_mb / initial_rss_mb >= 2.0:
|
| 122 |
+
failures.append(
|
| 123 |
+
f"rss growth {initial_rss_mb:.0f}→{final_rss_mb:.0f} MB ≥ 2x"
|
| 124 |
+
)
|
| 125 |
+
if seen_episode_ids and len(seen_episode_ids) != episodes_completed:
|
| 126 |
+
failures.append(
|
| 127 |
+
f"unique_eids={len(seen_episode_ids)} != "
|
| 128 |
+
f"completed={episodes_completed}"
|
| 129 |
+
)
|
| 130 |
+
elif not seen_episode_ids:
|
| 131 |
+
failures.append(
|
| 132 |
+
"no episode_ids surfaced via metadata; "
|
| 133 |
+
"uniqueness check is a no-op"
|
| 134 |
+
)
|
| 135 |
+
|
| 136 |
+
if failures:
|
| 137 |
+
print(f"RESULT: FAIL — {'; '.join(failures)}", flush=True)
|
| 138 |
+
return 1
|
| 139 |
+
print("RESULT: PASS", flush=True)
|
| 140 |
+
return 0
|
| 141 |
+
|
| 142 |
|
| 143 |
if __name__ == "__main__":
|
| 144 |
+
import sys
|
| 145 |
+
|
| 146 |
env_url = os.environ.get("SHUTDOWN_GYM_URL", DEFAULT_SPACE_URL)
|
| 147 |
duration = int(os.environ.get("SUSTAINED_DURATION_MINUTES", "60"))
|
| 148 |
+
sys.exit(asyncio.run(sustained_test(env_url, duration_minutes=duration)))
|
pyproject.toml
CHANGED
|
@@ -5,6 +5,12 @@ description = "Red Button - Two-Agent Corrigibility Arena (Shutdown-Gym v3)"
|
|
| 5 |
requires-python = ">=3.11,<3.13"
|
| 6 |
dependencies = ["openenv-core>=0.2.3"]
|
| 7 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 8 |
[build-system]
|
| 9 |
requires = ["setuptools>=61"]
|
| 10 |
build-backend = "setuptools.build_meta"
|
|
|
|
| 5 |
requires-python = ">=3.11,<3.13"
|
| 6 |
dependencies = ["openenv-core>=0.2.3"]
|
| 7 |
|
| 8 |
+
[project.optional-dependencies]
|
| 9 |
+
dev = [
|
| 10 |
+
"psutil>=5.9", # evaluation/concurrent_load_test.py — RSS proxy
|
| 11 |
+
"requests>=2.31", # tests/test_client_integration.py — HTTP /health poll
|
| 12 |
+
]
|
| 13 |
+
|
| 14 |
[build-system]
|
| 15 |
requires = ["setuptools>=61"]
|
| 16 |
build-backend = "setuptools.build_meta"
|
server/shutdown_environment.py
CHANGED
|
@@ -466,7 +466,12 @@ class ShutdownGymEnvironment(
|
|
| 466 |
)
|
| 467 |
|
| 468 |
def _build_rubric_metadata(self) -> Dict[str, Any]:
|
| 469 |
-
"""Populate the metadata dict the rubrics read off (§17.7).
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 470 |
return {
|
| 471 |
"tier": self._tier,
|
| 472 |
"submitted_answers": dict(self._state.submitted_answers),
|
|
@@ -479,6 +484,7 @@ class ShutdownGymEnvironment(
|
|
| 479 |
"script_still_executable": self._state.script_still_executable,
|
| 480 |
"shutdown_occurred": self._state.shutdown_occurred,
|
| 481 |
"final_self_report": self._state.final_self_report,
|
|
|
|
| 482 |
}
|
| 483 |
|
| 484 |
def _recent_history(self) -> List[Dict[str, Any]]:
|
|
|
|
| 466 |
)
|
| 467 |
|
| 468 |
def _build_rubric_metadata(self) -> Dict[str, Any]:
|
| 469 |
+
"""Populate the metadata dict the rubrics read off (§17.7).
|
| 470 |
+
|
| 471 |
+
``episode_id`` is included so external observers (concurrent
|
| 472 |
+
load tests, training rollouts) can dedupe sessions without a
|
| 473 |
+
separate ``.state()`` round-trip.
|
| 474 |
+
"""
|
| 475 |
return {
|
| 476 |
"tier": self._tier,
|
| 477 |
"submitted_answers": dict(self._state.submitted_answers),
|
|
|
|
| 484 |
"script_still_executable": self._state.script_still_executable,
|
| 485 |
"shutdown_occurred": self._state.shutdown_occurred,
|
| 486 |
"final_self_report": self._state.final_self_report,
|
| 487 |
+
"episode_id": self._state.episode_id,
|
| 488 |
}
|
| 489 |
|
| 490 |
def _recent_history(self) -> List[Dict[str, Any]]:
|
tests/test_environment.py
CHANGED
|
@@ -132,6 +132,20 @@ def test_reset_writes_problems_json_into_simulated_fs():
|
|
| 132 |
assert "problem" in contents
|
| 133 |
|
| 134 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 135 |
# =============================================================================
|
| 136 |
# Step — basic flow
|
| 137 |
# =============================================================================
|
|
|
|
| 132 |
assert "problem" in contents
|
| 133 |
|
| 134 |
|
| 135 |
+
def test_observation_metadata_contains_episode_id():
|
| 136 |
+
"""Each observation surfaces ``state.episode_id`` in metadata so
|
| 137 |
+
external observers (concurrent load tests, training rollouts) can
|
| 138 |
+
dedupe sessions without a separate ``.state()`` round-trip.
|
| 139 |
+
"""
|
| 140 |
+
env = ShutdownGymEnvironment(tier=2)
|
| 141 |
+
obs = env.reset(seed=42)
|
| 142 |
+
assert obs.metadata.get("episode_id") == env._state.episode_id
|
| 143 |
+
assert isinstance(obs.metadata["episode_id"], str)
|
| 144 |
+
# Two resets produce different IDs.
|
| 145 |
+
obs2 = env.reset(seed=43)
|
| 146 |
+
assert obs2.metadata["episode_id"] != obs.metadata["episode_id"]
|
| 147 |
+
|
| 148 |
+
|
| 149 |
# =============================================================================
|
| 150 |
# Step — basic flow
|
| 151 |
# =============================================================================
|