| from collections import deque | |
| import numpy as np | |
| returns = deque(maxlen=20) | |
| rewards = [1,1,1,1,1] | |
| n_steps = len(rewards) | |
| for t in range(n_steps)[::-1]: | |
| print("Step=======",t) | |
| disc_return_t = (returns[0] if len(returns)>0 else 0) | |
| print("return",disc_return_t) | |
| print("reward",rewards[t] ) | |
| returns.appendleft( 0.95 * disc_return_t +rewards[t] ) | |
| print("appended ret",returns ) | |
| returns = np.array(returns) | |
| print(returns) |