Spaces:

Ivan000
/

game

Runtime error

App Files Files Community

Ivan000 commited on Dec 12, 2024

Commit

927c930

verified ·

1 Parent(s): 375aee6

Update app.py

Browse files

Files changed (1) hide show

app.py +59 -45

app.py CHANGED Viewed

@@ -64,25 +64,25 @@ class Brick:
         self.rect = pygame.Rect(x, y, BRICK_WIDTH - 5, BRICK_HEIGHT - 5)
 class ArkanoidEnv(gym.Env):
-    def __init__(self, reward_size=1, penalty_size=-1, platform_reward=5, inactivity_penalty=-0.5):
         super(ArkanoidEnv, self).__init__()
         self.action_space = gym.spaces.Discrete(3)  # 0: stay, 1: move left, 2: move right
-        self.observation_space = gym.spaces.Box(low=0, high=SCREEN_WIDTH, shape=(3,), dtype=np.float32)
         self.reward_size = reward_size
         self.penalty_size = penalty_size
         self.platform_reward = platform_reward
-        self.inactivity_penalty = inactivity_penalty
-        self.inactivity_counter = 0
         self.reset()
     def reset(self, seed=None, options=None):
         self.paddle = Paddle()
         self.ball = Ball()
         self.bricks = [Brick(x, y) for y in range(BRICK_HEIGHT, BRICK_HEIGHT * (BRICK_ROWS + 1), BRICK_HEIGHT)
                        for x in range(BRICK_WIDTH, SCREEN_WIDTH - BRICK_WIDTH, BRICK_WIDTH)]
         self.done = False
         self.score = 0
-        self.inactivity_counter = 0
         return self._get_state(), {}
     def step(self, action):
@@ -93,40 +93,46 @@ class ArkanoidEnv(gym.Env):
         elif action == 2:
             self.paddle.move(1)
-        if action == 0:
-            self.inactivity_counter += 1 / FPS
-        else:
-            self.inactivity_counter = 0
-        if self.inactivity_counter >= 1:
-            reward = self.inactivity_penalty
-            return self._get_state(), reward, self.done, False, {}
         self.ball.move()
         if self.ball.rect.colliderect(self.paddle.rect):
             self.ball.velocity[1] = -self.ball.velocity[1]
-            self.ball.velocity[0] += random.uniform(-1, 1)
             self.score += self.platform_reward
         for brick in self.bricks[:]:
             if self.ball.rect.colliderect(brick.rect):
                 self.bricks.remove(brick)
                 self.ball.velocity[1] = -self.ball.velocity[1]
-                self.ball.velocity[0] += random.uniform(-1, 1)
                 self.score += 1
                 if not self.bricks:
                     self.done = True
-                    return self._get_state(), self.reward_size, self.done, False, {}
         if self.ball.rect.bottom >= SCREEN_HEIGHT:
             self.done = True
-            return self._get_state(), self.penalty_size, self.done, False, {}
-        return self._get_state(), 0, self.done, False, {}
     def _get_state(self):
-        return np.array([self.ball.rect.x, self.paddle.rect.x, len(self.bricks)], dtype=np.float32)
     def render(self, mode='rgb_array'):
         surface = pygame.Surface((SCREEN_WIDTH, SCREEN_HEIGHT))
@@ -145,32 +151,40 @@ class ArkanoidEnv(gym.Env):
     def close(self):
         pygame.quit()
-# Training and playing function
-def train_and_play(reward_size, penalty_size, platform_reward, inactivity_penalty, iterations):
-    env = ArkanoidEnv(reward_size, penalty_size, platform_reward, inactivity_penalty)
-    model = DQN("MlpPolicy", env, verbose=0)
-    model.learn(total_timesteps=iterations)
-    obs, _ = env.reset()
-    frames = []
-    while True:
-        action, _states = model.predict(obs)
-        obs, _, done, _, _ = env.step(action)
-        frame = env.render(mode="rgb_array")
-        frames.append(frame)
-        if done:
-            break
-    env.close()
-    video_path = "/tmp/arkanoid.mp4"
-    out = cv2.VideoWriter(video_path, cv2.VideoWriter_fourcc(*'mp4v'), FPS, (SCREEN_WIDTH, SCREEN_HEIGHT))
-    for frame in frames:
-        frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
-        out.write(frame)
-    out.release()
     return video_path
-# Gradio interface
 def main():
     iface = gr.Interface(
         fn=train_and_play,
@@ -178,10 +192,10 @@ def main():
             gr.Number(label="Reward Size", value=1),
             gr.Number(label="Penalty Size", value=-1),
             gr.Number(label="Platform Reward", value=5),
-            gr.Number(label="Inactivity Penalty", value=-0.5),
             gr.Slider(label="Iterations", minimum=10, maximum=100000, step=10, value=10000)
         ],
-        outputs="video"
     )
     iface.launch()

         self.rect = pygame.Rect(x, y, BRICK_WIDTH - 5, BRICK_HEIGHT - 5)
 class ArkanoidEnv(gym.Env):
+    def __init__(self, reward_size=1, penalty_size=-1, platform_reward=5):
         super(ArkanoidEnv, self).__init__()
         self.action_space = gym.spaces.Discrete(3)  # 0: stay, 1: move left, 2: move right
+        self.observation_space = gym.spaces.Box(low=0, high=SCREEN_WIDTH, shape=(5 + BRICK_ROWS * BRICK_COLS * 2,), dtype=np.float32)
         self.reward_size = reward_size
         self.penalty_size = penalty_size
         self.platform_reward = platform_reward
         self.reset()
     def reset(self, seed=None, options=None):
+        if seed is not None:
+            random.seed(seed)
+            np.random.seed(seed)
         self.paddle = Paddle()
         self.ball = Ball()
         self.bricks = [Brick(x, y) for y in range(BRICK_HEIGHT, BRICK_HEIGHT * (BRICK_ROWS + 1), BRICK_HEIGHT)
                        for x in range(BRICK_WIDTH, SCREEN_WIDTH - BRICK_WIDTH, BRICK_WIDTH)]
         self.done = False
         self.score = 0
         return self._get_state(), {}
     def step(self, action):
         elif action == 2:
             self.paddle.move(1)
         self.ball.move()
         if self.ball.rect.colliderect(self.paddle.rect):
             self.ball.velocity[1] = -self.ball.velocity[1]
             self.score += self.platform_reward
         for brick in self.bricks[:]:
             if self.ball.rect.colliderect(brick.rect):
                 self.bricks.remove(brick)
                 self.ball.velocity[1] = -self.ball.velocity[1]
                 self.score += 1
+                reward = self.reward_size
                 if not self.bricks:
+                    reward += self.reward_size * 10  # Bonus reward for breaking all bricks
                     self.done = True
+                    truncated = False
+                    return self._get_state(), reward, self.done, truncated, {}
         if self.ball.rect.bottom >= SCREEN_HEIGHT:
             self.done = True
+            reward = self.penalty_size
+            truncated = False
+        else:
+            reward = 0
+            truncated = False
+        return self._get_state(), reward, self.done, truncated, {}
     def _get_state(self):
+        state = [
+            self.paddle.rect.x,
+            self.ball.rect.x,
+            self.ball.rect.y,
+            self.ball.velocity[0],
+            self.ball.velocity[1]
+        ]
+        for brick in self.bricks:
+            state.extend([brick.rect.x, brick.rect.y])
+        state.extend([0, 0] * (BRICK_ROWS * BRICK_COLS - len(self.bricks)))  # Padding for missing bricks
+        return np.array(state, dtype=np.float32)
     def render(self, mode='rgb_array'):
         surface = pygame.Surface((SCREEN_WIDTH, SCREEN_HEIGHT))
     def close(self):
         pygame.quit()
+# Training and playing with custom parameters
+def train_and_play(reward_size, penalty_size, platform_reward, iterations):
+    env = ArkanoidEnv(reward_size=reward_size, penalty_size=penalty_size, platform_reward=platform_reward)
+    model = DQN('MlpPolicy', env, verbose=1)
+    timesteps_per_update = min(1000, iterations)
+    video_frames = []
+    completed_iterations = 0
+    while completed_iterations < iterations:
+        steps = min(timesteps_per_update, iterations - completed_iterations)
+        model.learn(total_timesteps=steps)
+        completed_iterations += steps
+        obs, _ = env.reset()
+        done = False
+        while not done:
+            action, _states = model.predict(obs, deterministic=True)
+            obs, reward, done, truncated, _ = env.step(action)
+            frame = env.render(mode='rgb_array')
+            frame = np.rot90(frame)
+            frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
+            video_frames.append(frame)
+    video_path = "arkanoid_training.mp4"
+    video_writer = cv2.VideoWriter(video_path, cv2.VideoWriter_fourcc(*'mp4v'), FPS, (SCREEN_WIDTH, SCREEN_HEIGHT))
+    for frame in video_frames:
+        video_writer.write(frame)
+    video_writer.release()
+    env.close()
     return video_path
+# Main function with Gradio interface
 def main():
     iface = gr.Interface(
         fn=train_and_play,
             gr.Number(label="Reward Size", value=1),
             gr.Number(label="Penalty Size", value=-1),
             gr.Number(label="Platform Reward", value=5),
             gr.Slider(label="Iterations", minimum=10, maximum=100000, step=10, value=10000)
         ],
+        outputs="video",
+        live=False  # Disable auto-generation on slider changes
     )
     iface.launch()