first test run on train_q_rag_log.py
Browse files- log_test_50.txt +20 -0
log_test_50.txt
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
100/80000, reward=0.378, eval_reward=0.567, qf_loss=22.837, step=1313
|
| 2 |
+
200/80000, reward=0.446, eval_reward=0.567, qf_loss=23.204, step=2613
|
| 3 |
+
300/80000, reward=0.465, eval_reward=0.567, qf_loss=24.270, step=3913
|
| 4 |
+
400/80000, reward=0.468, eval_reward=0.567, qf_loss=16.535, step=5213
|
| 5 |
+
500/80000, reward=0.462, eval_reward=0.497, qf_loss=10.573, step=6513
|
| 6 |
+
600/80000, reward=0.438, eval_reward=0.497, qf_loss=7.781, step=7813
|
| 7 |
+
700/80000, reward=0.430, eval_reward=0.497, qf_loss=5.375, step=9113
|
| 8 |
+
800/80000, reward=0.398, eval_reward=0.497, qf_loss=2.749, step=10413
|
| 9 |
+
900/80000, reward=0.214, eval_reward=0.357, qf_loss=0.695, step=11713
|
| 10 |
+
1000/80000, reward=0.193, eval_reward=0.357, qf_loss=0.468, step=13013
|
| 11 |
+
1100/80000, reward=0.188, eval_reward=0.357, qf_loss=0.201, step=14313
|
| 12 |
+
1200/80000, reward=0.178, eval_reward=0.357, qf_loss=0.181, step=15613
|
| 13 |
+
1300/80000, reward=0.140, eval_reward=0.190, qf_loss=0.077, step=16913
|
| 14 |
+
1400/80000, reward=0.147, eval_reward=0.190, qf_loss=0.107, step=18213
|
| 15 |
+
1500/80000, reward=0.155, eval_reward=0.190, qf_loss=0.096, step=19513
|
| 16 |
+
1600/80000, reward=0.165, eval_reward=0.190, qf_loss=0.093, step=20813
|
| 17 |
+
1700/80000, reward=0.178, eval_reward=0.273, qf_loss=0.145, step=22113
|
| 18 |
+
1800/80000, reward=0.222, eval_reward=0.273, qf_loss=0.092, step=23413
|
| 19 |
+
1900/80000, reward=0.223, eval_reward=0.273, qf_loss=0.080, step=24713
|
| 20 |
+
2000/80000, reward=0.245, eval_reward=0.273, qf_loss=0.081, step=26013
|