LovecaSim / ai /environments /vec_env_adapter_legacy.py
trioskosmos's picture
Upload ai/environments/vec_env_adapter_legacy.py with huggingface_hub
8c0b3c9 verified
import numpy as np
from ai.vector_env_legacy import VectorGameState
from gymnasium import spaces
from stable_baselines3.common.vec_env import VecEnv
class VectorEnvAdapter(VecEnv):
"""
Wraps the LEGACY Numba-accelerated VectorGameState (320 dim).
"""
metadata = {"render_modes": ["rgb_array"]}
def __init__(self, num_envs, observation_space_dim=320, action_space=None):
self.num_envs = num_envs
self.game_state = VectorGameState(num_envs)
# Observation Space size - Flexible Legacy
obs_dim = observation_space_dim
self.observation_space = spaces.Box(low=0, high=1, shape=(obs_dim,), dtype=np.float32)
if action_space is None:
action_space = spaces.Discrete(1000)
self.action_space = action_space
self.actions = None
self.render_mode = None
# Track previous scores for delta-based rewards (Same logic is fine)
self.prev_scores = np.zeros(num_envs, dtype=np.int32)
def reset(self):
self.game_state.reset()
self.prev_scores.fill(0)
return self.game_state.get_observations()
def step_async(self, actions):
self.actions = actions
def step_wait(self):
actions_int32 = self.actions.astype(np.int32)
# Legacy step doesn't support opponent simulation internally usually?
# Checked vector_env_legacy.py: step_vectorized DOES exist.
# But looking at legacy file content:
# It calls batch_apply_action.
# It does NOT call step_opponent_vectorized.
# So legacy environment is "Solitaire" only?
# That means Opponent Score never increases?
# If so, comparing against Random Opponent logic inside New Env is unfair.
# But wait, if Legacy Model was trained in Solitaire, it expects Solitaire.
# If I want to compare "Performance", I should use the same conditions.
# However, the user wants to compare "Checkpoints".
# If legacy checkpoint was trained for "Reach 10 points fast", then benchmark is "Average Turns to 10".
self.game_state.step(actions_int32)
obs = self.game_state.get_observations()
# Rewards (Same logic as modern adapter to ensure fair comparison of metrics?)
current_scores = self.game_state.batch_scores
delta_scores = current_scores - self.prev_scores
rewards = delta_scores.astype(np.float32)
rewards -= 0.001
dones = current_scores >= 10
win_mask = dones & (delta_scores > 0)
rewards[win_mask] += 5.0
self.prev_scores = current_scores.copy()
if np.any(dones):
reset_indices = np.where(dones)[0]
self.game_state.reset(list(reset_indices))
self.prev_scores[reset_indices] = 0
obs = self.game_state.get_observations()
infos = []
for i in range(self.num_envs):
if dones[i]:
infos.append({"terminal_observation": obs[i], "episode": {"r": rewards[i], "l": 10}})
else:
infos.append({})
else:
infos = [{} for _ in range(self.num_envs)]
return obs, rewards, dones, infos
def close(self):
pass
def get_attr(self, attr_name, indices=None):
return []
def set_attr(self, attr_name, value, indices=None):
pass
def env_method(self, method_name, *method_args, **method_kwargs):
return []
def env_is_wrapped(self, wrapper_class, indices=None):
return [False] * self.num_envs
def action_masks(self):
# Legacy env has no masks, return all True
return np.ones((self.num_envs, 1000), dtype=bool)