TetrisAI / reward_wrapper.py
marci0929's picture
updated model
715111b
from math import sqrt
import gym
import numpy as np
class CustomRewardWrapper(gym.Wrapper):
def __init__(self, env):
super().__init__(env)
self.prev_max_height = 0
self.prev_cleared = 0
self.prev_score = 0
self.prev_holes = 0
def step(self, action):
obs, reward, done, info = self.env.step(action)
board = obs["board"]
heights = obs["heights"]
# # Default reward
reward = 1
# # reward = ((self.height - max(heights)) / self.height)
# # reward += np.sum(board)
# reward = (self.height - max(heights)) / self.height
# reward += 2
# #
# # reward = (self.score - self.prev_score) + 1
# # self.prev_score = self.score
# #
# # # if max(heights) < self.prev_max_height:
# # reward += (self.prev_max_height - max(heights))
# # self.prev_max_height = max(heights)
# #
# reward += self.cleared_lines
reward += (self.cleared_lines - self.prev_cleared) ** 3
#
# # Penalty for big differences between columns
reward -= self.get_bumpiness_and_height(board)[0] / self.height
# Penalty for high columns
reward += (self.height / 2 ) - np.max(heights) * 0.5
#
# # Penalty for holes
# # holes_val = 0
# # for col_num in range(self.width):
# # col_value = 0
# # for row_num in range(self.height - 1, self.height - 1 - heights[col_num], -1):
# # col_value += 1 if board[row_num][col_num] == 1 else -(row_num / self.width)
# # holes_val += col_value / (1 + heights[col_num])
#
holes = self.get_holes(board)
reward -= (holes - self.prev_holes) * 0.8
# reward = 1 + ((self.cleared_lines - self.prev_cleared) ** 2) * self.width
self.prev_max_height = np.max(heights)
self.prev_cleared = self.cleared_lines
self.prev_score = self.score
self.prev_holes = holes
if self.gameover:
self.prev_max_height = 0
self.prev_cleared = 0
self.prev_score = 0
self.prev_holes = 0
return obs, reward, done, info