Q = {} actions = ["important", "spam", "normal"] def get_action(state): if state not in Q: Q[state] = {a: 0 for a in actions} return max(Q[state], key=Q[state].get) def update_q(state, action, reward): if state not in Q: Q[state] = {a: 0 for a in actions} Q[state][action] += reward