| import numpy as np |
|
|
| def compute_gae(next_value, rewards, masks, values, gamma, lam): |
| """ |
| Computes Generalized Advantage Estimation (GAE) and Returns. |
| """ |
| values = values + [next_value] |
| gae = 0 |
| returns = [] |
| |
| for step in reversed(range(len(rewards))): |
| |
| delta = rewards[step] + gamma * values[step + 1] * masks[step] - values[step] |
|
|
| gae = delta + gamma * lam * masks[step] * gae |
| returns.insert(0, gae + values[step]) |
| |
| advantages = np.array(returns) - np.array(values[:-1]) |
| return returns, advantages |
|
|
| def normalize(x): |
| """Standardizes advantages to have mean 0 and std 1 for training stability.""" |
| return (x - x.mean()) / (x.std() + 1e-8) |