#!/usr/bin/env python3 """Smoke test: download TIL repo, verify imports, run 100 steps, push dummy checkpoint.""" import os, sys print("="*60) print("SMOKE TEST: HF Job private repo access + training basics") print("="*60) # 1. Test snapshot_download of private Space print("\n[1/5] Downloading TIL repo via snapshot_download...") from huggingface_hub import snapshot_download snapshot_download( repo_id="e-rong/til-26-ae", repo_type="space", local_dir="/app/til-26-ae-repo", allow_patterns=["til_environment/**"], ) print(" ✓ Downloaded") # 2. Install TIL environment print("\n[2/5] Installing TIL environment...") import subprocess subprocess.run(["pip", "install", "-e", "."], cwd="/app/til-26-ae-repo/til-26-ae", check=True) print(" ✓ Installed") # 3. Verify imports print("\n[3/5] Verifying imports...") sys.path.insert(0, "/app/til-26-ae-repo/til-26-ae") from til_environment.bomberman_env import Bomberman from til_environment.config import default_config from pettingzoo.utils.conversions import aec_to_parallel print(" ✓ Imports OK") # 4. Run 100 steps of dummy training print("\n[4/5] Running 100 training steps...") from sb3_contrib import MaskablePPO from sb3_contrib.common.wrappers import ActionMasker from stable_baselines3.common.monitor import Monitor import gymnasium from gymnasium.spaces import Box, Discrete import numpy as np class QuickEnv(gymnasium.Env): def __init__(self): super().__init__() cfg = default_config() cfg.env.render_mode = None raw = Bomberman(cfg) self._parallel_env = aec_to_parallel(raw) self.agent_id = "agent_0" self._episode_count = 0 self.action_space = Discrete(6) vl = int(cfg.dynamics.vision.behind) + int(cfg.dynamics.vision.ahead) + 1 vw = int(cfg.dynamics.vision.left) + int(cfg.dynamics.vision.right) + 1 av = vl * vw * 25 br = int(cfg.entities.base.vision_radius) bs = 2 * br + 1 bv = bs * bs * 25 self._obs_size = av + bv + 11 self.observation_space = Box(low=-np.inf, high=np.inf, shape=(self._obs_size,), dtype=np.float32) self._last_action_mask = None self._last_obs_dict = None def reset(self, seed=None, options=None): self._episode_count += 1 obs_dict, _ = self._parallel_env.reset(seed=self._episode_count, options=options) self._last_obs_dict = obs_dict self._last_action_mask = obs_dict[self.agent_id]["action_mask"].astype(bool) return self._flatten(obs_dict[self.agent_id]), {} def step(self, action): actions = {self.agent_id: action} for aid, obs in self._last_obs_dict.items(): if aid != self.agent_id: valid = np.where(obs["action_mask"] == 1)[0] actions[aid] = int(np.random.choice(valid)) if len(valid) > 0 else 0 obs_dict, rewards, terminations, truncations, infos = self._parallel_env.step(actions) self._last_obs_dict = obs_dict if self.agent_id not in obs_dict: return np.zeros(self._obs_size, dtype=np.float32), 0.0, True, False, {} self._last_action_mask = obs_dict[self.agent_id]["action_mask"].astype(bool) obs = self._flatten(obs_dict[self.agent_id]) r = float(rewards.get(self.agent_id, 0.0)) done = terminations.get(self.agent_id, False) or truncations.get(self.agent_id, False) return obs, r, done, False, infos.get(self.agent_id, {}) def action_masks(self): return self._last_action_mask def _flatten(self, od): return np.concatenate([ od["agent_viewcone"].flatten(), od["base_viewcone"].flatten(), np.array([od["direction"]], dtype=np.float32), od["location"].flatten().astype(np.float32), od["base_location"].flatten().astype(np.float32), od["health"].flatten().astype(np.float32), np.array([od["frozen_ticks"]], dtype=np.float32), od["base_health"].flatten().astype(np.float32), od["team_resources"].flatten().astype(np.float32), np.array([od["team_bombs"]], dtype=np.float32), np.array([od["step"]], dtype=np.float32), ], dtype=np.float32) env = ActionMasker(QuickEnv(), lambda e: e.action_masks()) env = Monitor(env) model = MaskablePPO( "MlpPolicy", env, learning_rate=3e-4, n_steps=128, batch_size=32, n_epochs=2, gamma=0.99, clip_range=0.2, ent_coef=0.01, verbose=0, device="cuda", ) model.learn(total_timesteps=100, progress_bar=False) print(" ✓ 100 steps completed") # 5. Push dummy checkpoint to Hub print("\n[5/5] Pushing dummy checkpoint to Hub...") from huggingface_hub import HfApi model.save("/app/smoke_test_ckpt.zip") HfApi().upload_file( path_or_fileobj="/app/smoke_test_ckpt.zip", path_in_repo="smoke_test_ckpt.zip", repo_id="E-Rong/til-26-ae-agent", repo_type="model", ) print(" ✓ Pushed to Hub") print("\n" + "="*60) print("SMOKE TEST PASSED — Ready for full training job") print("="*60)