| from math import sqrt | |
| import gym | |
| import numpy as np | |
| class CustomRewardWrapper(gym.Wrapper): | |
| def __init__(self, env): | |
| super().__init__(env) | |
| self.prev_max_height = 0 | |
| self.prev_cleared = 0 | |
| self.prev_score = 0 | |
| self.prev_holes = 0 | |
| def step(self, action): | |
| obs, reward, done, info = self.env.step(action) | |
| board = obs["board"] | |
| heights = obs["heights"] | |
| # # Default reward | |
| reward = 1 | |
| # # reward = ((self.height - max(heights)) / self.height) | |
| # # reward += np.sum(board) | |
| # reward = (self.height - max(heights)) / self.height | |
| # reward += 2 | |
| # # | |
| # # reward = (self.score - self.prev_score) + 1 | |
| # # self.prev_score = self.score | |
| # # | |
| # # # if max(heights) < self.prev_max_height: | |
| # # reward += (self.prev_max_height - max(heights)) | |
| # # self.prev_max_height = max(heights) | |
| # # | |
| # reward += self.cleared_lines | |
| reward += (self.cleared_lines - self.prev_cleared) ** 3 | |
| # | |
| # # Penalty for big differences between columns | |
| reward -= self.get_bumpiness_and_height(board)[0] / self.height | |
| # Penalty for high columns | |
| reward += (self.height / 2 ) - np.max(heights) * 0.5 | |
| # | |
| # # Penalty for holes | |
| # # holes_val = 0 | |
| # # for col_num in range(self.width): | |
| # # col_value = 0 | |
| # # for row_num in range(self.height - 1, self.height - 1 - heights[col_num], -1): | |
| # # col_value += 1 if board[row_num][col_num] == 1 else -(row_num / self.width) | |
| # # holes_val += col_value / (1 + heights[col_num]) | |
| # | |
| holes = self.get_holes(board) | |
| reward -= (holes - self.prev_holes) * 0.8 | |
| # reward = 1 + ((self.cleared_lines - self.prev_cleared) ** 2) * self.width | |
| self.prev_max_height = np.max(heights) | |
| self.prev_cleared = self.cleared_lines | |
| self.prev_score = self.score | |
| self.prev_holes = holes | |
| if self.gameover: | |
| self.prev_max_height = 0 | |
| self.prev_cleared = 0 | |
| self.prev_score = 0 | |
| self.prev_holes = 0 | |
| return obs, reward, done, info | |