Spaces:

iteratehack
/

team222

Runtime error

App Files Files Community

team222 / models /liquid_ppo.py

ylop

Deploy 2M step LNN training with optimized GPU utilization

28dbd6d verified 12 days ago

raw

history blame contribute delete

4.09 kB

	import torch
	import torch.nn as nn
	import gymnasium as gym
	from stable_baselines3 import PPO
	from stable_baselines3.common.torch_layers import BaseFeaturesExtractor
	from stable_baselines3.common.env_util import make_vec_env
	from stable_baselines3.common.vec_env import SubprocVecEnv
	from ncps.torch import LTC
	from ncps.wirings import AutoNCP
	from env.drone_3d import Drone3DEnv

	class LTCFeatureExtractor(BaseFeaturesExtractor):
	"""
	Custom Feature Extractor using Liquid Time-Constant (LTC) Cells.
	This allows the agent to handle irregular time-steps and stiff dynamics better than standard MLPs or LSTMs.
	"""
	def __init__(self, observation_space: gym.spaces.Box, features_dim: int = 32):
	super().__init__(observation_space, features_dim)

	input_size = observation_space.shape[0]
	# self.features_dim is already set by super().__init__

	# Neural Circuit Policy (NCP) wiring for structured connectivity
	# We use a small wiring to keep inference fast (< 10ms)
	# AutoNCP requires units > output_size. Let's use 48 units for 32 outputs.
	wiring = AutoNCP(48, output_size=features_dim)

	self.ltc = LTC(input_size, wiring, batch_first=True)

	# Hidden state for the LTC
	self.hx = None

	def forward(self, observations: torch.Tensor) -> torch.Tensor:
	# LTC expects (batch, time, features)
	# SB3 provides (batch, features), so we add a time dimension
	if observations.dim() == 2:
	observations = observations.unsqueeze(1)

	# Initialize hidden state if needed or if batch size changes
	batch_size = observations.size(0)
	if self.hx is None or self.hx.size(0) != batch_size:
	self.hx = torch.zeros(batch_size, self.ltc.state_size, device=observations.device)

	# Forward pass through LTC
	# Note: In a real recurrent setting with SB3, we'd need to manage hidden states
	# more carefully (e.g. using RecurrentPPO from sb3-contrib).
	# For this demo, we are using a simplified approach where we treat the LTC
	# as a stateful feature extractor that maintains state between calls within a batch.
	# However, standard PPO assumes stateless policies.
	# To make this truly "Liquid" in a standard PPO loop without sb3-contrib,
	# we approximate by running the LTC on the current step.
	# A better approach for production would be RecurrentPPO.
	# Given the constraints and the goal of a "demo", we will use the LTC
	# but reset state if we detect a new episode (which is hard here).
	# So we will let the LTC evolve.

	# Detach hidden state from previous graph to prevent "backward through graph a second time" error
	if self.hx is not None:
	self.hx = self.hx.detach()

	output, self.hx = self.ltc(observations, self.hx)

	# Remove time dimension
	return output.squeeze(1)

	def make_liquid_ppo(env, verbose=1):
	"""
	Factory function to create a PPO agent with Liquid Brain.
	"""
	# Parallel Environments for High-Performance Training
	# A100/A10G are data hungry. We need to run physics on many CPU cores to feed them.
	# We will use 1 environment to debug (DummyVecEnv)
	n_envs = 4
	env = make_vec_env(
	lambda: Drone3DEnv(render_mode=None, wind_scale=10.0, wind_speed=5.0),
	n_envs=n_envs,
	vec_env_cls=SubprocVecEnv
	)

	# Create Model with optimized hyperparameters for A100
	policy_kwargs = dict(
	features_extractor_class=LTCFeatureExtractor,
	features_extractor_kwargs=dict(features_dim=32),
	)

	model = PPO(
	"MlpPolicy",
	env,
	verbose=verbose,
	learning_rate=1e-3,
	n_steps=2048,
	batch_size=64,
	n_epochs=10,
	gamma=0.99,
	gae_lambda=0.95,
	clip_range=0.2,
	policy_kwargs=policy_kwargs,
	device='cuda' # Use GPU
	)
	return model