Spaces:
Sleeping
Sleeping
| import numpy as np | |
| from ai.vector_env_legacy import VectorGameState | |
| from gymnasium import spaces | |
| from stable_baselines3.common.vec_env import VecEnv | |
| class VectorEnvAdapter(VecEnv): | |
| """ | |
| Wraps the LEGACY Numba-accelerated VectorGameState (320 dim). | |
| """ | |
| metadata = {"render_modes": ["rgb_array"]} | |
| def __init__(self, num_envs, observation_space_dim=320, action_space=None): | |
| self.num_envs = num_envs | |
| self.game_state = VectorGameState(num_envs) | |
| # Observation Space size - Flexible Legacy | |
| obs_dim = observation_space_dim | |
| self.observation_space = spaces.Box(low=0, high=1, shape=(obs_dim,), dtype=np.float32) | |
| if action_space is None: | |
| action_space = spaces.Discrete(1000) | |
| self.action_space = action_space | |
| self.actions = None | |
| self.render_mode = None | |
| # Track previous scores for delta-based rewards (Same logic is fine) | |
| self.prev_scores = np.zeros(num_envs, dtype=np.int32) | |
| def reset(self): | |
| self.game_state.reset() | |
| self.prev_scores.fill(0) | |
| return self.game_state.get_observations() | |
| def step_async(self, actions): | |
| self.actions = actions | |
| def step_wait(self): | |
| actions_int32 = self.actions.astype(np.int32) | |
| # Legacy step doesn't support opponent simulation internally usually? | |
| # Checked vector_env_legacy.py: step_vectorized DOES exist. | |
| # But looking at legacy file content: | |
| # It calls batch_apply_action. | |
| # It does NOT call step_opponent_vectorized. | |
| # So legacy environment is "Solitaire" only? | |
| # That means Opponent Score never increases? | |
| # If so, comparing against Random Opponent logic inside New Env is unfair. | |
| # But wait, if Legacy Model was trained in Solitaire, it expects Solitaire. | |
| # If I want to compare "Performance", I should use the same conditions. | |
| # However, the user wants to compare "Checkpoints". | |
| # If legacy checkpoint was trained for "Reach 10 points fast", then benchmark is "Average Turns to 10". | |
| self.game_state.step(actions_int32) | |
| obs = self.game_state.get_observations() | |
| # Rewards (Same logic as modern adapter to ensure fair comparison of metrics?) | |
| current_scores = self.game_state.batch_scores | |
| delta_scores = current_scores - self.prev_scores | |
| rewards = delta_scores.astype(np.float32) | |
| rewards -= 0.001 | |
| dones = current_scores >= 10 | |
| win_mask = dones & (delta_scores > 0) | |
| rewards[win_mask] += 5.0 | |
| self.prev_scores = current_scores.copy() | |
| if np.any(dones): | |
| reset_indices = np.where(dones)[0] | |
| self.game_state.reset(list(reset_indices)) | |
| self.prev_scores[reset_indices] = 0 | |
| obs = self.game_state.get_observations() | |
| infos = [] | |
| for i in range(self.num_envs): | |
| if dones[i]: | |
| infos.append({"terminal_observation": obs[i], "episode": {"r": rewards[i], "l": 10}}) | |
| else: | |
| infos.append({}) | |
| else: | |
| infos = [{} for _ in range(self.num_envs)] | |
| return obs, rewards, dones, infos | |
| def close(self): | |
| pass | |
| def get_attr(self, attr_name, indices=None): | |
| return [] | |
| def set_attr(self, attr_name, value, indices=None): | |
| pass | |
| def env_method(self, method_name, *method_args, **method_kwargs): | |
| return [] | |
| def env_is_wrapped(self, wrapper_class, indices=None): | |
| return [False] * self.num_envs | |
| def action_masks(self): | |
| # Legacy env has no masks, return all True | |
| return np.ones((self.num_envs, 1000), dtype=bool) | |