Spaces:
Running
Running
Upload ai/environments/vec_env_adapter_legacy.py with huggingface_hub
Browse files
ai/environments/vec_env_adapter_legacy.py
ADDED
|
@@ -0,0 +1,102 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import numpy as np
|
| 2 |
+
from ai.vector_env_legacy import VectorGameState
|
| 3 |
+
from gymnasium import spaces
|
| 4 |
+
from stable_baselines3.common.vec_env import VecEnv
|
| 5 |
+
|
| 6 |
+
|
| 7 |
+
class VectorEnvAdapter(VecEnv):
|
| 8 |
+
"""
|
| 9 |
+
Wraps the LEGACY Numba-accelerated VectorGameState (320 dim).
|
| 10 |
+
"""
|
| 11 |
+
|
| 12 |
+
metadata = {"render_modes": ["rgb_array"]}
|
| 13 |
+
|
| 14 |
+
def __init__(self, num_envs, observation_space_dim=320, action_space=None):
|
| 15 |
+
self.num_envs = num_envs
|
| 16 |
+
self.game_state = VectorGameState(num_envs)
|
| 17 |
+
# Observation Space size - Flexible Legacy
|
| 18 |
+
obs_dim = observation_space_dim
|
| 19 |
+
self.observation_space = spaces.Box(low=0, high=1, shape=(obs_dim,), dtype=np.float32)
|
| 20 |
+
if action_space is None:
|
| 21 |
+
action_space = spaces.Discrete(1000)
|
| 22 |
+
|
| 23 |
+
self.action_space = action_space
|
| 24 |
+
self.actions = None
|
| 25 |
+
self.render_mode = None
|
| 26 |
+
|
| 27 |
+
# Track previous scores for delta-based rewards (Same logic is fine)
|
| 28 |
+
self.prev_scores = np.zeros(num_envs, dtype=np.int32)
|
| 29 |
+
|
| 30 |
+
def reset(self):
|
| 31 |
+
self.game_state.reset()
|
| 32 |
+
self.prev_scores.fill(0)
|
| 33 |
+
return self.game_state.get_observations()
|
| 34 |
+
|
| 35 |
+
def step_async(self, actions):
|
| 36 |
+
self.actions = actions
|
| 37 |
+
|
| 38 |
+
def step_wait(self):
|
| 39 |
+
actions_int32 = self.actions.astype(np.int32)
|
| 40 |
+
|
| 41 |
+
# Legacy step doesn't support opponent simulation internally usually?
|
| 42 |
+
# Checked vector_env_legacy.py: step_vectorized DOES exist.
|
| 43 |
+
# But looking at legacy file content:
|
| 44 |
+
# It calls batch_apply_action.
|
| 45 |
+
# It does NOT call step_opponent_vectorized.
|
| 46 |
+
# So legacy environment is "Solitaire" only?
|
| 47 |
+
# That means Opponent Score never increases?
|
| 48 |
+
# If so, comparing against Random Opponent logic inside New Env is unfair.
|
| 49 |
+
# But wait, if Legacy Model was trained in Solitaire, it expects Solitaire.
|
| 50 |
+
# If I want to compare "Performance", I should use the same conditions.
|
| 51 |
+
# However, the user wants to compare "Checkpoints".
|
| 52 |
+
# If legacy checkpoint was trained for "Reach 10 points fast", then benchmark is "Average Turns to 10".
|
| 53 |
+
|
| 54 |
+
self.game_state.step(actions_int32)
|
| 55 |
+
obs = self.game_state.get_observations()
|
| 56 |
+
|
| 57 |
+
# Rewards (Same logic as modern adapter to ensure fair comparison of metrics?)
|
| 58 |
+
current_scores = self.game_state.batch_scores
|
| 59 |
+
delta_scores = current_scores - self.prev_scores
|
| 60 |
+
rewards = delta_scores.astype(np.float32)
|
| 61 |
+
rewards -= 0.001
|
| 62 |
+
|
| 63 |
+
dones = current_scores >= 10
|
| 64 |
+
win_mask = dones & (delta_scores > 0)
|
| 65 |
+
rewards[win_mask] += 5.0
|
| 66 |
+
|
| 67 |
+
self.prev_scores = current_scores.copy()
|
| 68 |
+
|
| 69 |
+
if np.any(dones):
|
| 70 |
+
reset_indices = np.where(dones)[0]
|
| 71 |
+
self.game_state.reset(list(reset_indices))
|
| 72 |
+
self.prev_scores[reset_indices] = 0
|
| 73 |
+
obs = self.game_state.get_observations()
|
| 74 |
+
infos = []
|
| 75 |
+
for i in range(self.num_envs):
|
| 76 |
+
if dones[i]:
|
| 77 |
+
infos.append({"terminal_observation": obs[i], "episode": {"r": rewards[i], "l": 10}})
|
| 78 |
+
else:
|
| 79 |
+
infos.append({})
|
| 80 |
+
else:
|
| 81 |
+
infos = [{} for _ in range(self.num_envs)]
|
| 82 |
+
|
| 83 |
+
return obs, rewards, dones, infos
|
| 84 |
+
|
| 85 |
+
def close(self):
|
| 86 |
+
pass
|
| 87 |
+
|
| 88 |
+
def get_attr(self, attr_name, indices=None):
|
| 89 |
+
return []
|
| 90 |
+
|
| 91 |
+
def set_attr(self, attr_name, value, indices=None):
|
| 92 |
+
pass
|
| 93 |
+
|
| 94 |
+
def env_method(self, method_name, *method_args, **method_kwargs):
|
| 95 |
+
return []
|
| 96 |
+
|
| 97 |
+
def env_is_wrapped(self, wrapper_class, indices=None):
|
| 98 |
+
return [False] * self.num_envs
|
| 99 |
+
|
| 100 |
+
def action_masks(self):
|
| 101 |
+
# Legacy env has no masks, return all True
|
| 102 |
+
return np.ones((self.num_envs, 1000), dtype=bool)
|