File size: 327 Bytes
dc762fd
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
Q = {}
actions = ["important", "spam", "normal"]

def get_action(state):
    if state not in Q:
        Q[state] = {a: 0 for a in actions}
    return max(Q[state], key=Q[state].get)

def update_q(state, action, reward):
    if state not in Q:
        Q[state] = {a: 0 for a in actions}
    Q[state][action] += reward