Spaces:
Sleeping
Sleeping
| import numpy as np | |
| episodes = [ | |
| [["A", "a1", 3], ["A", "a2", 2], ["B", "a1", -4], ["A", "a1", 4], ["B", "a1", -3]], | |
| [["B", "a1", -2], ["A", "a1", 3], ["B", "a2", -3]], | |
| ] | |
| index_map = { | |
| "states": { | |
| "A": 0, | |
| "B": 1, | |
| }, | |
| "actions": { | |
| "a1": 0, | |
| "a2": 1, | |
| }, | |
| } | |
| def main_r(): | |
| print("# MonteCarloAgent.py") | |
| alpha = 0.1 | |
| num_states = 2 | |
| v = np.zeros(num_states) | |
| rets = {s: [] for s in index_map["states"].keys()} | |
| for ep in episodes: | |
| print("=" * 80) | |
| g = 0 | |
| ep_len = len(ep) | |
| print(f"# Episode: {ep} (steps: {ep_len}) G: {g}") | |
| for t in range(ep_len - 1, -1, -1): | |
| s, a, r = ep[t] | |
| si = index_map["states"][s] | |
| g = g + r | |
| print(f"# Step {t + 1}:") | |
| print(f"\ts: {s}, a: {a}, r: {r}") | |
| print(f"\tG: {g}") | |
| # unless st appears in the episode before time t | |
| if s not in [x[0] for x in ep[:t]]: | |
| rets[s].append(g) | |
| v[si] = alpha * (sum(rets[s]) / len(rets[s])) | |
| # v[si] = v[si] + alpha * (g - v[si]) | |
| print(f"\tV[{s}] = {v[si]}") | |
| if __name__ == "__main__": | |
| main_r() | |