File size: 2,234 Bytes
13bec41
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
715111b
13bec41
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
715111b
 
 
13bec41
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
from math import sqrt

import gym
import numpy as np


class CustomRewardWrapper(gym.Wrapper):

    def __init__(self, env):
        super().__init__(env)
        self.prev_max_height = 0
        self.prev_cleared = 0
        self.prev_score = 0
        self.prev_holes = 0

    def step(self, action):
        obs, reward, done, info = self.env.step(action)
        board = obs["board"]
        heights = obs["heights"]

        # # Default reward
        reward = 1
        # # reward = ((self.height - max(heights)) / self.height)
        # # reward += np.sum(board)
        # reward = (self.height - max(heights)) / self.height
        # reward += 2
        # #
        # # reward = (self.score - self.prev_score) + 1
        # # self.prev_score = self.score
        # #
        # # # if max(heights) < self.prev_max_height:
        # # reward += (self.prev_max_height - max(heights))
        # # self.prev_max_height = max(heights)
        # #
        # reward += self.cleared_lines
        reward += (self.cleared_lines - self.prev_cleared) ** 3
        #
        # # Penalty for big differences between columns
        reward -= self.get_bumpiness_and_height(board)[0] / self.height

        # Penalty for high columns
        reward += (self.height / 2 ) - np.max(heights) * 0.5
        #
        # # Penalty for holes
        # # holes_val = 0
        # # for col_num in range(self.width):
        # #     col_value = 0
        # #     for row_num in range(self.height - 1, self.height - 1 - heights[col_num], -1):
        # #         col_value += 1 if board[row_num][col_num] == 1 else -(row_num / self.width)
        # #     holes_val += col_value / (1 + heights[col_num])
        #
        holes = self.get_holes(board)
        reward -= (holes - self.prev_holes) * 0.8


        # reward = 1 + ((self.cleared_lines - self.prev_cleared) ** 2) * self.width

        self.prev_max_height = np.max(heights)
        self.prev_cleared = self.cleared_lines
        self.prev_score = self.score
        self.prev_holes = holes

        if self.gameover:
            self.prev_max_height = 0
            self.prev_cleared = 0
            self.prev_score = 0
            self.prev_holes = 0

        return obs, reward, done, info