import pygame import time import numpy as np from stable_baselines3 import PPO from gymnasium import spaces import gymnasium as gym # ------------------------- # Side-Scrolling PixelCopter Environment (Medium/Certification Friendly) # ------------------------- class PixelCopterCertEnv(gym.Env): def __init__(self, screen_width=50, screen_height=10, gap_size=6): super().__init__() self.screen_width = screen_width self.screen_height = screen_height self.copter_y = self.screen_height // 2 self.copter_velocity = 0 self.gravity = 0.25 self.lift = -0.9 self.done = False self.timestep = 0 self.max_timesteps = 500 self.gap_size = gap_size self.wall_gap_positions = [np.random.randint(1, self.screen_height - self.gap_size -1) for _ in range(screen_width)] self.action_space = spaces.Discrete(2) self.observation_space = spaces.Box( low=0, high=self.screen_height, shape=(self.screen_width + 1,), dtype=np.float32 ) def reset(self, seed=None, options=None): self.copter_y = self.screen_height // 2 self.copter_velocity = 0 self.done = False self.timestep = 0 self.wall_gap_positions = [np.random.randint(1, self.screen_height - self.gap_size -1) for _ in range(self.screen_width)] obs = np.array([self.copter_y] + self.wall_gap_positions, dtype=np.float32) return obs, {} def step(self, action): if action == 1: self.copter_velocity += self.lift self.copter_velocity += self.gravity self.copter_y += self.copter_velocity self.copter_y = np.clip(self.copter_y, 0, self.screen_height) self.wall_gap_positions = self.wall_gap_positions[1:] last_gap = self.wall_gap_positions[-1] new_gap = last_gap + np.random.choice([-1,0,1]) new_gap = np.clip(new_gap, 1, self.screen_height - self.gap_size -1) self.wall_gap_positions.append(new_gap) gap_top = self.wall_gap_positions[0] gap_bottom = gap_top + self.gap_size if self.copter_y <= gap_top or self.copter_y >= gap_bottom: self.done = True reward = -5 else: reward = 1 self.timestep += 1 if self.timestep >= self.max_timesteps: self.done = True obs = np.array([self.copter_y] + self.wall_gap_positions, dtype=np.float32) return obs, reward, self.done, False, {} # ------------------------- # Pygame Setup # ------------------------- pygame.init() WINDOW_WIDTH = 800 WINDOW_HEIGHT = 400 WIN = pygame.display.set_mode((WINDOW_WIDTH, WINDOW_HEIGHT)) pygame.display.set_caption("PixelCopter Certification Test") WHITE = (255,255,255) BLUE = (0,102,255) BLACK = (0,0,0) def draw_screen(obs, env): WIN.fill(WHITE) scale_y = WINDOW_HEIGHT / env.screen_height scale_x = WINDOW_WIDTH / env.screen_width copter_y = int(obs[0]) wall_gaps = [int(gap) for gap in obs[1:]] # Draw walls for i, gap_top in enumerate(wall_gaps): gap_bottom = gap_top + env.gap_size pygame.draw.rect(WIN, BLACK, pygame.Rect(i*scale_x, 0, scale_x, gap_top*scale_y)) pygame.draw.rect(WIN, BLACK, pygame.Rect(i*scale_x, gap_bottom*scale_y, scale_x, WINDOW_HEIGHT - gap_bottom*scale_y)) # Draw copter pygame.draw.rect(WIN, BLUE, pygame.Rect(50, copter_y*scale_y - 10, 20, 20)) pygame.display.update() # ------------------------- # Load Model & Test # ------------------------- env = PixelCopterCertEnv(screen_width=80, screen_height=10, gap_size=6) model = PPO.load("ppo_pixelcopter_cert") episodes = 5 for ep in range(episodes): obs, _ = env.reset() done = False total_reward = 0 while not done: for event in pygame.event.get(): if event.type==pygame.QUIT: done=True pygame.quit() action, _ = model.predict(obs) obs, reward, done, truncated, info = env.step(action) total_reward += reward draw_screen(obs, env) time.sleep(0.03) print(f"Episode {ep+1} reward: {total_reward}") pygame.quit()