File size: 4,385 Bytes
f9f2735
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
import pygame
import time
import numpy as np
from stable_baselines3 import PPO
from gymnasium import spaces
import gymnasium as gym

# -------------------------
# Side-Scrolling PixelCopter Environment (Medium/Certification Friendly)
# -------------------------
class PixelCopterCertEnv(gym.Env):
    def __init__(self, screen_width=50, screen_height=10, gap_size=6):
        super().__init__()
        self.screen_width = screen_width
        self.screen_height = screen_height
        self.copter_y = self.screen_height // 2
        self.copter_velocity = 0
        self.gravity = 0.25
        self.lift = -0.9
        self.done = False
        self.timestep = 0
        self.max_timesteps = 500
        self.gap_size = gap_size
        self.wall_gap_positions = [np.random.randint(1, self.screen_height - self.gap_size -1) 
                                   for _ in range(screen_width)]
        self.action_space = spaces.Discrete(2)
        self.observation_space = spaces.Box(
            low=0, high=self.screen_height, shape=(self.screen_width + 1,), dtype=np.float32
        )

    def reset(self, seed=None, options=None):
        self.copter_y = self.screen_height // 2
        self.copter_velocity = 0
        self.done = False
        self.timestep = 0
        self.wall_gap_positions = [np.random.randint(1, self.screen_height - self.gap_size -1) 
                                   for _ in range(self.screen_width)]
        obs = np.array([self.copter_y] + self.wall_gap_positions, dtype=np.float32)
        return obs, {}

    def step(self, action):
        if action == 1:
            self.copter_velocity += self.lift
        self.copter_velocity += self.gravity
        self.copter_y += self.copter_velocity
        self.copter_y = np.clip(self.copter_y, 0, self.screen_height)

        self.wall_gap_positions = self.wall_gap_positions[1:]
        last_gap = self.wall_gap_positions[-1]
        new_gap = last_gap + np.random.choice([-1,0,1])
        new_gap = np.clip(new_gap, 1, self.screen_height - self.gap_size -1)
        self.wall_gap_positions.append(new_gap)

        gap_top = self.wall_gap_positions[0]
        gap_bottom = gap_top + self.gap_size
        if self.copter_y <= gap_top or self.copter_y >= gap_bottom:
            self.done = True
            reward = -5
        else:
            reward = 1

        self.timestep += 1
        if self.timestep >= self.max_timesteps:
            self.done = True

        obs = np.array([self.copter_y] + self.wall_gap_positions, dtype=np.float32)
        return obs, reward, self.done, False, {}

# -------------------------
# Pygame Setup
# -------------------------
pygame.init()
WINDOW_WIDTH = 800
WINDOW_HEIGHT = 400
WIN = pygame.display.set_mode((WINDOW_WIDTH, WINDOW_HEIGHT))
pygame.display.set_caption("PixelCopter Certification Test")
WHITE = (255,255,255)
BLUE = (0,102,255)
BLACK = (0,0,0)

def draw_screen(obs, env):
    WIN.fill(WHITE)
    scale_y = WINDOW_HEIGHT / env.screen_height
    scale_x = WINDOW_WIDTH / env.screen_width
    copter_y = int(obs[0])
    wall_gaps = [int(gap) for gap in obs[1:]]

    # Draw walls
    for i, gap_top in enumerate(wall_gaps):
        gap_bottom = gap_top + env.gap_size
        pygame.draw.rect(WIN, BLACK, pygame.Rect(i*scale_x, 0, scale_x, gap_top*scale_y))
        pygame.draw.rect(WIN, BLACK, pygame.Rect(i*scale_x, gap_bottom*scale_y, scale_x, WINDOW_HEIGHT - gap_bottom*scale_y))

    # Draw copter
    pygame.draw.rect(WIN, BLUE, pygame.Rect(50, copter_y*scale_y - 10, 20, 20))
    pygame.display.update()

# -------------------------
# Load Model & Test
# -------------------------
env = PixelCopterCertEnv(screen_width=80, screen_height=10, gap_size=6)
model = PPO.load("ppo_pixelcopter_cert")

episodes = 5
for ep in range(episodes):
    obs, _ = env.reset()
    done = False
    total_reward = 0
    while not done:
        for event in pygame.event.get():
            if event.type==pygame.QUIT:
                done=True
                pygame.quit()
        action, _ = model.predict(obs)
        obs, reward, done, truncated, info = env.step(action)
        total_reward += reward
        draw_screen(obs, env)
        time.sleep(0.03)
    print(f"Episode {ep+1} reward: {total_reward}")

pygame.quit()