Update symphony_S2/train.py
Browse files- symphony_S2/train.py +3 -3
symphony_S2/train.py
CHANGED
|
@@ -193,9 +193,9 @@ def sim_loop(env, episodes, testing, Q_learning, algo, total_rewards, total_step
|
|
| 193 |
save(algo, total_rewards, total_steps)
|
| 194 |
print("start testing")
|
| 195 |
log_file.write(str(total_steps) + ",")
|
| 196 |
-
|
| 197 |
print("end of testing")
|
| 198 |
-
log_file.write(str(round(
|
| 199 |
|
| 200 |
|
| 201 |
# if steps is close to episode limit (e.g. 950) we shut down actions and leave noise to get Terminal Transition:
|
|
@@ -230,4 +230,4 @@ Q_learning, total_rewards, total_steps = load(algo, Q_learning)
|
|
| 230 |
if not Q_learning: log_file.clean()
|
| 231 |
|
| 232 |
# Training
|
| 233 |
-
sim_loop(env, num_episodes, False, Q_learning, algo, total_rewards, total_steps)
|
|
|
|
| 193 |
save(algo, total_rewards, total_steps)
|
| 194 |
print("start testing")
|
| 195 |
log_file.write(str(total_steps) + ",")
|
| 196 |
+
test_return = sim_loop(env_test, 25, True, Q_learning, algo, [], total_steps=0)
|
| 197 |
print("end of testing")
|
| 198 |
+
log_file.write(str(round(test_return, 2)) + "\n")
|
| 199 |
|
| 200 |
|
| 201 |
# if steps is close to episode limit (e.g. 950) we shut down actions and leave noise to get Terminal Transition:
|
|
|
|
| 230 |
if not Q_learning: log_file.clean()
|
| 231 |
|
| 232 |
# Training
|
| 233 |
+
sim_loop(env, num_episodes, False, Q_learning, algo, total_rewards, total_steps)
|