trioskosmos commited on
Commit
8c0b3c9
·
verified ·
1 Parent(s): c1209b4

Upload ai/environments/vec_env_adapter_legacy.py with huggingface_hub

Browse files
ai/environments/vec_env_adapter_legacy.py ADDED
@@ -0,0 +1,102 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ from ai.vector_env_legacy import VectorGameState
3
+ from gymnasium import spaces
4
+ from stable_baselines3.common.vec_env import VecEnv
5
+
6
+
7
+ class VectorEnvAdapter(VecEnv):
8
+ """
9
+ Wraps the LEGACY Numba-accelerated VectorGameState (320 dim).
10
+ """
11
+
12
+ metadata = {"render_modes": ["rgb_array"]}
13
+
14
+ def __init__(self, num_envs, observation_space_dim=320, action_space=None):
15
+ self.num_envs = num_envs
16
+ self.game_state = VectorGameState(num_envs)
17
+ # Observation Space size - Flexible Legacy
18
+ obs_dim = observation_space_dim
19
+ self.observation_space = spaces.Box(low=0, high=1, shape=(obs_dim,), dtype=np.float32)
20
+ if action_space is None:
21
+ action_space = spaces.Discrete(1000)
22
+
23
+ self.action_space = action_space
24
+ self.actions = None
25
+ self.render_mode = None
26
+
27
+ # Track previous scores for delta-based rewards (Same logic is fine)
28
+ self.prev_scores = np.zeros(num_envs, dtype=np.int32)
29
+
30
+ def reset(self):
31
+ self.game_state.reset()
32
+ self.prev_scores.fill(0)
33
+ return self.game_state.get_observations()
34
+
35
+ def step_async(self, actions):
36
+ self.actions = actions
37
+
38
+ def step_wait(self):
39
+ actions_int32 = self.actions.astype(np.int32)
40
+
41
+ # Legacy step doesn't support opponent simulation internally usually?
42
+ # Checked vector_env_legacy.py: step_vectorized DOES exist.
43
+ # But looking at legacy file content:
44
+ # It calls batch_apply_action.
45
+ # It does NOT call step_opponent_vectorized.
46
+ # So legacy environment is "Solitaire" only?
47
+ # That means Opponent Score never increases?
48
+ # If so, comparing against Random Opponent logic inside New Env is unfair.
49
+ # But wait, if Legacy Model was trained in Solitaire, it expects Solitaire.
50
+ # If I want to compare "Performance", I should use the same conditions.
51
+ # However, the user wants to compare "Checkpoints".
52
+ # If legacy checkpoint was trained for "Reach 10 points fast", then benchmark is "Average Turns to 10".
53
+
54
+ self.game_state.step(actions_int32)
55
+ obs = self.game_state.get_observations()
56
+
57
+ # Rewards (Same logic as modern adapter to ensure fair comparison of metrics?)
58
+ current_scores = self.game_state.batch_scores
59
+ delta_scores = current_scores - self.prev_scores
60
+ rewards = delta_scores.astype(np.float32)
61
+ rewards -= 0.001
62
+
63
+ dones = current_scores >= 10
64
+ win_mask = dones & (delta_scores > 0)
65
+ rewards[win_mask] += 5.0
66
+
67
+ self.prev_scores = current_scores.copy()
68
+
69
+ if np.any(dones):
70
+ reset_indices = np.where(dones)[0]
71
+ self.game_state.reset(list(reset_indices))
72
+ self.prev_scores[reset_indices] = 0
73
+ obs = self.game_state.get_observations()
74
+ infos = []
75
+ for i in range(self.num_envs):
76
+ if dones[i]:
77
+ infos.append({"terminal_observation": obs[i], "episode": {"r": rewards[i], "l": 10}})
78
+ else:
79
+ infos.append({})
80
+ else:
81
+ infos = [{} for _ in range(self.num_envs)]
82
+
83
+ return obs, rewards, dones, infos
84
+
85
+ def close(self):
86
+ pass
87
+
88
+ def get_attr(self, attr_name, indices=None):
89
+ return []
90
+
91
+ def set_attr(self, attr_name, value, indices=None):
92
+ pass
93
+
94
+ def env_method(self, method_name, *method_args, **method_kwargs):
95
+ return []
96
+
97
+ def env_is_wrapped(self, wrapper_class, indices=None):
98
+ return [False] * self.num_envs
99
+
100
+ def action_masks(self):
101
+ # Legacy env has no masks, return all True
102
+ return np.ones((self.num_envs, 1000), dtype=bool)