import gymnasium as gym from gymnasium import spaces import numpy as np from stable_baselines3 import PPO # ------------------------- # Side-Scrolling PixelCopter Environment (Medium/Certification Friendly) # ------------------------- class PixelCopterCertEnv(gym.Env): def __init__(self, screen_width=50, screen_height=10, gap_size=6): super().__init__() self.screen_width = screen_width self.screen_height = screen_height self.copter_y = self.screen_height // 2 self.copter_velocity = 0 self.gravity = 0.25 self.lift = -0.9 self.done = False self.timestep = 0 self.max_timesteps = 500 self.gap_size = gap_size self.wall_gap_positions = [np.random.randint(1, self.screen_height - self.gap_size -1) for _ in range(screen_width)] self.action_space = spaces.Discrete(2) self.observation_space = spaces.Box( low=0, high=self.screen_height, shape=(self.screen_width + 1,), dtype=np.float32 ) def reset(self, seed=None, options=None): self.copter_y = self.screen_height // 2 self.copter_velocity = 0 self.done = False self.timestep = 0 self.wall_gap_positions = [np.random.randint(1, self.screen_height - self.gap_size -1) for _ in range(self.screen_width)] obs = np.array([self.copter_y] + self.wall_gap_positions, dtype=np.float32) return obs, {} def step(self, action): # Apply action if action == 1: self.copter_velocity += self.lift self.copter_velocity += self.gravity self.copter_y += self.copter_velocity self.copter_y = np.clip(self.copter_y, 0, self.screen_height) # Move walls left self.wall_gap_positions = self.wall_gap_positions[1:] last_gap = self.wall_gap_positions[-1] new_gap = last_gap + np.random.choice([-1,0,1]) new_gap = np.clip(new_gap, 1, self.screen_height - self.gap_size -1) self.wall_gap_positions.append(new_gap) # Check collision with first wall gap_top = self.wall_gap_positions[0] gap_bottom = gap_top + self.gap_size if self.copter_y <= gap_top or self.copter_y >= gap_bottom: self.done = True reward = -5 else: reward = 1 self.timestep += 1 if self.timestep >= self.max_timesteps: self.done = True obs = np.array([self.copter_y] + self.wall_gap_positions, dtype=np.float32) return obs, reward, self.done, False, {} # ------------------------- # Training # ------------------------- env = PixelCopterCertEnv(screen_width=80, screen_height=10, gap_size=6) model = PPO("MlpPolicy", env, verbose=1) print("Training started...") model.learn(total_timesteps=500_000) # Enough for certification print("Training finished!") model.save("ppo_pixelcopter_cert") print("Model saved as 'ppo_pixelcopter_cert.zip'")