Spaces:

trioskosmos
/

LovecaSim

Sleeping

App Files Files Community

LovecaSim / ai /environments /vec_env_adapter_legacy.py

trioskosmos

Upload ai/environments/vec_env_adapter_legacy.py with huggingface_hub

8c0b3c9 verified 25 days ago

raw

history blame contribute delete

3.82 kB

	import numpy as np
	from ai.vector_env_legacy import VectorGameState
	from gymnasium import spaces
	from stable_baselines3.common.vec_env import VecEnv


	class VectorEnvAdapter(VecEnv):
	"""
	Wraps the LEGACY Numba-accelerated VectorGameState (320 dim).
	"""

	metadata = {"render_modes": ["rgb_array"]}

	def __init__(self, num_envs, observation_space_dim=320, action_space=None):
	self.num_envs = num_envs
	self.game_state = VectorGameState(num_envs)
	# Observation Space size - Flexible Legacy
	obs_dim = observation_space_dim
	self.observation_space = spaces.Box(low=0, high=1, shape=(obs_dim,), dtype=np.float32)
	if action_space is None:
	action_space = spaces.Discrete(1000)

	self.action_space = action_space
	self.actions = None
	self.render_mode = None

	# Track previous scores for delta-based rewards (Same logic is fine)
	self.prev_scores = np.zeros(num_envs, dtype=np.int32)

	def reset(self):
	self.game_state.reset()
	self.prev_scores.fill(0)
	return self.game_state.get_observations()

	def step_async(self, actions):
	self.actions = actions

	def step_wait(self):
	actions_int32 = self.actions.astype(np.int32)

	# Legacy step doesn't support opponent simulation internally usually?
	# Checked vector_env_legacy.py: step_vectorized DOES exist.
	# But looking at legacy file content:
	# It calls batch_apply_action.
	# It does NOT call step_opponent_vectorized.
	# So legacy environment is "Solitaire" only?
	# That means Opponent Score never increases?
	# If so, comparing against Random Opponent logic inside New Env is unfair.
	# But wait, if Legacy Model was trained in Solitaire, it expects Solitaire.
	# If I want to compare "Performance", I should use the same conditions.
	# However, the user wants to compare "Checkpoints".
	# If legacy checkpoint was trained for "Reach 10 points fast", then benchmark is "Average Turns to 10".

	self.game_state.step(actions_int32)
	obs = self.game_state.get_observations()

	# Rewards (Same logic as modern adapter to ensure fair comparison of metrics?)
	current_scores = self.game_state.batch_scores
	delta_scores = current_scores - self.prev_scores
	rewards = delta_scores.astype(np.float32)
	rewards -= 0.001

	dones = current_scores >= 10
	win_mask = dones & (delta_scores > 0)
	rewards[win_mask] += 5.0

	self.prev_scores = current_scores.copy()

	if np.any(dones):
	reset_indices = np.where(dones)[0]
	self.game_state.reset(list(reset_indices))
	self.prev_scores[reset_indices] = 0
	obs = self.game_state.get_observations()
	infos = []
	for i in range(self.num_envs):
	if dones[i]:
	infos.append({"terminal_observation": obs[i], "episode": {"r": rewards[i], "l": 10}})
	else:
	infos.append({})
	else:
	infos = [{} for _ in range(self.num_envs)]

	return obs, rewards, dones, infos

	def close(self):
	pass

	def get_attr(self, attr_name, indices=None):
	return []

	def set_attr(self, attr_name, value, indices=None):
	pass

	def env_method(self, method_name, method_args, *method_kwargs):
	return []

	def env_is_wrapped(self, wrapper_class, indices=None):
	return [False] * self.num_envs

	def action_masks(self):
	# Legacy env has no masks, return all True
	return np.ones((self.num_envs, 1000), dtype=bool)