| |
| """Smoke test: download TIL repo, verify imports, run 100 steps, push dummy checkpoint.""" |
| import os, sys |
|
|
| print("="*60) |
| print("SMOKE TEST: HF Job private repo access + training basics") |
| print("="*60) |
|
|
| |
| print("\n[1/5] Downloading TIL repo via snapshot_download...") |
| from huggingface_hub import snapshot_download |
| snapshot_download( |
| repo_id="e-rong/til-26-ae", |
| repo_type="space", |
| local_dir="/app/til-26-ae-repo", |
| allow_patterns=["til_environment/**"], |
| ) |
| print(" β Downloaded") |
|
|
| |
| print("\n[2/5] Installing TIL environment...") |
| import subprocess |
| subprocess.run(["pip", "install", "-e", "."], cwd="/app/til-26-ae-repo/til-26-ae", check=True) |
| print(" β Installed") |
|
|
| |
| print("\n[3/5] Verifying imports...") |
| sys.path.insert(0, "/app/til-26-ae-repo/til-26-ae") |
| from til_environment.bomberman_env import Bomberman |
| from til_environment.config import default_config |
| from pettingzoo.utils.conversions import aec_to_parallel |
| print(" β Imports OK") |
|
|
| |
| print("\n[4/5] Running 100 training steps...") |
| from sb3_contrib import MaskablePPO |
| from sb3_contrib.common.wrappers import ActionMasker |
| from stable_baselines3.common.monitor import Monitor |
| import gymnasium |
| from gymnasium.spaces import Box, Discrete |
| import numpy as np |
|
|
| class QuickEnv(gymnasium.Env): |
| def __init__(self): |
| super().__init__() |
| cfg = default_config() |
| cfg.env.render_mode = None |
| raw = Bomberman(cfg) |
| self._parallel_env = aec_to_parallel(raw) |
| self.agent_id = "agent_0" |
| self._episode_count = 0 |
| self.action_space = Discrete(6) |
| vl = int(cfg.dynamics.vision.behind) + int(cfg.dynamics.vision.ahead) + 1 |
| vw = int(cfg.dynamics.vision.left) + int(cfg.dynamics.vision.right) + 1 |
| av = vl * vw * 25 |
| br = int(cfg.entities.base.vision_radius) |
| bs = 2 * br + 1 |
| bv = bs * bs * 25 |
| self._obs_size = av + bv + 11 |
| self.observation_space = Box(low=-np.inf, high=np.inf, shape=(self._obs_size,), dtype=np.float32) |
| self._last_action_mask = None |
| self._last_obs_dict = None |
| def reset(self, seed=None, options=None): |
| self._episode_count += 1 |
| obs_dict, _ = self._parallel_env.reset(seed=self._episode_count, options=options) |
| self._last_obs_dict = obs_dict |
| self._last_action_mask = obs_dict[self.agent_id]["action_mask"].astype(bool) |
| return self._flatten(obs_dict[self.agent_id]), {} |
| def step(self, action): |
| actions = {self.agent_id: action} |
| for aid, obs in self._last_obs_dict.items(): |
| if aid != self.agent_id: |
| valid = np.where(obs["action_mask"] == 1)[0] |
| actions[aid] = int(np.random.choice(valid)) if len(valid) > 0 else 0 |
| obs_dict, rewards, terminations, truncations, infos = self._parallel_env.step(actions) |
| self._last_obs_dict = obs_dict |
| if self.agent_id not in obs_dict: |
| return np.zeros(self._obs_size, dtype=np.float32), 0.0, True, False, {} |
| self._last_action_mask = obs_dict[self.agent_id]["action_mask"].astype(bool) |
| obs = self._flatten(obs_dict[self.agent_id]) |
| r = float(rewards.get(self.agent_id, 0.0)) |
| done = terminations.get(self.agent_id, False) or truncations.get(self.agent_id, False) |
| return obs, r, done, False, infos.get(self.agent_id, {}) |
| def action_masks(self): |
| return self._last_action_mask |
| def _flatten(self, od): |
| return np.concatenate([ |
| od["agent_viewcone"].flatten(), od["base_viewcone"].flatten(), |
| np.array([od["direction"]], dtype=np.float32), |
| od["location"].flatten().astype(np.float32), |
| od["base_location"].flatten().astype(np.float32), |
| od["health"].flatten().astype(np.float32), |
| np.array([od["frozen_ticks"]], dtype=np.float32), |
| od["base_health"].flatten().astype(np.float32), |
| od["team_resources"].flatten().astype(np.float32), |
| np.array([od["team_bombs"]], dtype=np.float32), |
| np.array([od["step"]], dtype=np.float32), |
| ], dtype=np.float32) |
|
|
| env = ActionMasker(QuickEnv(), lambda e: e.action_masks()) |
| env = Monitor(env) |
|
|
| model = MaskablePPO( |
| "MlpPolicy", env, |
| learning_rate=3e-4, n_steps=128, batch_size=32, n_epochs=2, |
| gamma=0.99, clip_range=0.2, ent_coef=0.01, |
| verbose=0, device="cuda", |
| ) |
| model.learn(total_timesteps=100, progress_bar=False) |
| print(" β 100 steps completed") |
|
|
| |
| print("\n[5/5] Pushing dummy checkpoint to Hub...") |
| from huggingface_hub import HfApi |
| model.save("/app/smoke_test_ckpt.zip") |
| HfApi().upload_file( |
| path_or_fileobj="/app/smoke_test_ckpt.zip", |
| path_in_repo="smoke_test_ckpt.zip", |
| repo_id="E-Rong/til-26-ae-agent", |
| repo_type="model", |
| ) |
| print(" β Pushed to Hub") |
|
|
| print("\n" + "="*60) |
| print("SMOKE TEST PASSED β Ready for full training job") |
| print("="*60) |
|
|