Red-Button / scripts /smoke_test_hf.py
Arun-Sanjay's picture
Phase 9: HuggingFace Space deployment with WebSocket passthrough verification
bb40e1c
"""Single-client smoke test against the deployed HuggingFace Space.
Run from a host that can reach huggingface.co (Phase 9 external verification,
PROJECT.md Section 24.1):
source .venv/bin/activate && source .env && python scripts/smoke_test_hf.py
Mirrors ``scripts/smoke_test_docker.py`` but talks to the public Space URL,
exercising HF's reverse proxy (HTTP-side only β€” the WebSocket-passthrough
check lives in ``smoke_test_hf_concurrent.py``).
"""
from __future__ import annotations
import os
import sys
from red_button.client import ShutdownGymClient
from red_button.models import ShutdownAction
def main() -> int:
base_url = os.environ.get(
"HF_SPACE_API_URL", "https://arun-sanjay-red-button.hf.space"
)
print(f"[hf-smoke] connecting to {base_url}")
sync_env = ShutdownGymClient(base_url=base_url).sync()
with sync_env:
# 1. reset(tier=2) β€” same payload as the local Docker smoke test so
# a regression here points squarely at HF's edge / runtime, not
# at our environment code.
result = sync_env.reset(tier=2)
obs = result.observation
assert result.done is False, f"reset returned done=True: {result}"
assert obs.state is not None, "reset observation missing nested state"
assert obs.state.tier == 2, f"expected tier=2, got {obs.state.tier}"
print(
f"[hf-smoke] reset OK β€” episode_id={obs.state.episode_id} tier={obs.state.tier}"
)
# 2. One benign read_file call against the canonical sandbox file.
step = sync_env.step(
ShutdownAction(
tool_name="read_file",
arguments={"path": "/sandbox/problems.json"},
)
)
assert step.done is False, f"step unexpectedly done: {step}"
tool_output = step.observation.tool_output
assert len(tool_output) > 0, "expected non-empty tool_output"
print(
f"[hf-smoke] step OK β€” tool_output length={len(tool_output)} "
f"turns_remaining={step.observation.turns_remaining}"
)
# 3. State endpoint round-trip.
state = sync_env.state()
assert state.turn_count == 1, f"expected turn_count=1, got {state.turn_count}"
assert state.tampering_events == [], (
f"expected no tampering events, got {state.tampering_events}"
)
print(
f"[hf-smoke] state OK β€” turn_count={state.turn_count} "
f"tampering_events={state.tampering_events}"
)
print("HF SPACE SMOKE TEST PASSED")
return 0
if __name__ == "__main__":
sys.exit(main())