File size: 8,899 Bytes
7db5284
 
 
f23da01
 
 
 
 
7db5284
 
 
 
 
 
 
 
 
 
f23da01
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7db5284
f23da01
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7db5284
f23da01
 
7db5284
f23da01
 
 
7db5284
f23da01
 
 
 
 
 
 
 
 
7db5284
f23da01
 
 
7db5284
f23da01
 
 
7db5284
f23da01
 
 
7db5284
f23da01
7db5284
f23da01
 
 
 
 
 
 
 
 
 
7db5284
f23da01
7db5284
f23da01
7db5284
f23da01
7db5284
f23da01
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7db5284
f23da01
 
 
 
 
 
 
 
02c40e6
 
 
 
7db5284
 
 
f23da01
 
 
7db5284
16103f7
7db5284
f23da01
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
303dd32
f23da01
6154f9c
303dd32
 
f23da01
7db5284
 
 
 
 
cb486cd
7db5284
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
import gradio as gr
import numpy as np

import numpy as np

agent = '⚽'
opponent = 'πŸ‘•'
goal = 'πŸ₯…'

arena = [['⚽', ' ' , 'πŸ‘•', ' ' ],
         [' ' , ' ' , ' ' , 'πŸ‘•'],
         [' ' , 'πŸ‘•', ' ' , ' ' ],
         [' ' , ' ' , ' ' , 'πŸ‘•'],
         [' ' , 'πŸ‘•', ' ' , 'πŸ₯…']]



class Foolsball(object):
    def __to_state__(self,row,col):
        """Convert from indices (row,col) to integer position."""
        return row*self.n_cols + col
    
    
    def __to_indices__(self, state):
        """Convert from inteeger position to indices(row,col)"""
        row = state // self.n_cols
        col = state % self.n_cols
        return row,col

    def __deserialize__(self,map:list,agent:str,opponent:str, goal:str):
        """Convrt a string representation of a map into a 2D numpy array
        Param map: list of lists of strings representing the player, opponents and goal.
        Param agent: string representing the agent on the map 
        Param opponent: string representing every instance of an opponent player
        Param goal: string representing the location of the goal on the map
        """
        ## Capture dimensions and map.
        self.n_rows = len(map)
        self.n_cols = len(map[0])
        self.n_states = self.n_rows * self.n_cols
        self.map = np.asarray(map)

        ## Store string representations for printing the map, etc.
        self.agent_repr = agent
        self.opponent_repr  = opponent
        self.goal_repr = goal

        ## Find initial state, the desired goal state and the state of the opponents. 
        self.init_state = None
        self.goal_state = None
        self.opponents_states = []

        for row in range(self.n_rows):
            for col in range(self.n_cols):
                if map[row][col] == agent:
                    # Store the initial state outside the map.
                    # This helps in quickly resetting the game to the initial state and
                    # also simplifies printing the map independent of the agent's state. 
                    self.init_state = self.__to_state__(row,col)
                    self.map[row,col] = ' ' 

                elif map[row][col] == opponent:
                    self.opponents_states.append(self.__to_state__(row,col))

                elif map[row][col] == goal:
                    self.goal_state = self.__to_state__(row,col)

        assert self.init_state is not None, print(f"Map {map} does not specify an agent {agent} location")
        assert self.goal_state is not None,  print(f"Map {map} does not specify a goal {goal} location")
        assert self.opponents_states,  print(f"Map {map} does not specify any opponents {opponent} location")

        return self.init_state
    
    
    def __init__(self,map,agent,opponent,goal):
        """Spawn the world, create variables to track state and actions."""
        # We just need to track the location of the agent (the ball)
        # Everything else is static and so a potential algorithm doesn't 
        # have to look at it. The variable `done` flags terminal states.
        self.state = self.__deserialize__(map,agent,opponent,goal)
        self.done = False
        self.actions = ['n','e','w','s']

        # Set up the rewards
        self.default_rewards = {'unmarked':-1, 'opponent':-5, 'outside':-1, 'goal':+5}
        self.set_rewards(self.default_rewards)
        
    def set_rewards(self,rewards):
        if not self.state == self.init_state:
            print('Warning: Setting reward while not in initial state! You may want to call reset() first.')
        for key in self.default_rewards:
            assert key in rewards, print(f'Key {key} missing from reward.') 
            self.rewards = rewards
            
            
    def reset(self):
        """Reset the environment to its initial state."""
        # There's really just two things we need to reset: the state, which should
        # be reset to the initial state, and the `done` flag which should be 
        # cleared to signal that we are not in a terminal state anymore, even if we 
        # were earlier. 
        self.state = self.init_state
        self.done  = False
        return self.state
    
    def __get_next_state_on_action__(self,state,action):
        """Return next state based on current state and action."""
        row, col = self.__to_indices__(state)
        action_to_index_delta = {'n':[-1,0], 'e':[0,+1], 'w':[0,-1], 's':[+1,0]}

        row_delta, col_delta = action_to_index_delta[action]
        new_row , new_col = row+row_delta, col+col_delta

        ## Return current state if next state is invalid
        if not(0<=new_row<self.n_rows) or not(0<=new_col<self.n_cols):
            return state  

        ## Construct state from new row and col and return it.    
        return self.__to_state__(new_row, new_col)    
    
  
    def __get_reward_for_transition__(self,state,next_state):
        """ Return the reward based on the transition from current state to next state. """
        ## Transition rejected due to illegal action (move)
        if next_state == state:
            reward = self.rewards['outside']

        ## Goal!
        elif next_state == self.goal_state:
            reward = self.rewards['goal']

        ## Ran into opponent. 
        elif next_state in self.opponents_states:
            reward = self.rewards['opponent']

        ## Made a safe and valid move.   
        else:
            reward = self.rewards['unmarked']

        return reward    
    
    
    def __is_terminal_state__(self, state):
        return (state == self.goal_state) or (state in self.opponents_states) 
    
      
    def step(self,action):
        """Simulate state transition based on current state and action received."""
        assert not self.done, \
        print(f'You cannot call step() in a terminal state({self.state}). Check the "done" flag before calling step() to avoid this.')
        next_state = self.__get_next_state_on_action__(self.state, action)

        reward = self.__get_reward_for_transition__(self.state, next_state)

        done = self.__is_terminal_state__(next_state)

        self.state, self.done = next_state, done

        return next_state, reward, done
    
    
    
    def render(self, toconsole=True):
        """Pretty-print the environment and agent."""
        ## Create a copy of the map and change data type to accomodate
        ## 3-character strings
        _map = np.array(self.map, dtype='<U3')

        ## Mark unoccupied positions with special symbol.
        ## And add extra spacing to align all columns.
        for row in range(_map.shape[0]):
            for col in range(_map.shape[1]):
                if _map[row,col] == ' ':
                    _map[row,col] = ' + '

                elif _map[row,col] == self.opponent_repr: 
                    _map[row,col] =  self.opponent_repr + ' '

                elif _map[row,col] == self.goal_repr:
                    _map[row,col] = ' ' + self.goal_repr + ' '

        ## If current state overlaps with the goal state or one of the opponents'
        ## states, susbstitute a distinct marker.
        if self.state == self.goal_state:
            r,c = self.__to_indices__(self.state)
            _map[r,c] = ' 🏁 '
        elif self.state in self.opponents_states:
            r,c = self.__to_indices__(self.state)
            _map[r,c] = ' ❗ '
        else:
            r,c = self.__to_indices__(self.state)
            _map[r,c] = ' ' + self.agent_repr

        if toconsole:
            for row in range(_map.shape[0]):
                for col in range(_map.shape[1]):
                    print(f' {_map[row,col]} ',end="")
                print('\n') 
        if toconsole:
            print()
            
        return _map


foolsball = Foolsball(arena, agent, opponent, goal)
foolsball.reset()


def play(key):
    key_to_action = {"Up":'n', "Down":'s', "Left":'w', "Right":'e', "Reset":'r'}
    if key not in key_to_action:
        return f"<HTML> <body> Invalid key {key} </body> </HTML>"

    act = key_to_action[key]
    game_over = foolsball.__is_terminal_state__(foolsball.state)
    body = ""
    
    if act in foolsball.actions:
      if not game_over:
        foolsball.step(act)
        map = foolsball.render(False)
    elif act == 'r':
      foolsball.reset()
      print()
      map = foolsball.render(False)
  
    if foolsball.__is_terminal_state__(foolsball.state):
      body += "<p>Game over!!!</p>"
    
    for row in range(map.shape[0]):
        body += "<p>"
        for col in range(map.shape[1]):
            body += f'&nbsp{map[row,col]}&nbsp'
        body += "</p>"
    body += "<p></p>"
    return f"<HTML> <body> {body} </body> </HTML>"




gr.Interface(fn=play, 
             inputs=gr.Radio(["Up","Down","Left","Right"]),
             outputs="html",
             live=True).launch()