End of training
Browse files- README.md +2 -2
- all_results.json +6 -6
- eval_results.json +3 -3
- train_results.json +3 -3
- trainer_state.json +603 -603
README.md
CHANGED
|
@@ -16,9 +16,9 @@ should probably proofread and complete it, then remove this comment. -->
|
|
| 16 |
|
| 17 |
# train_stsb_1745333591
|
| 18 |
|
| 19 |
-
This model is a fine-tuned version of [meta-llama/Meta-Llama-3-8B-Instruct](https://huggingface.co/meta-llama/Meta-Llama-3-8B-Instruct) on
|
| 20 |
It achieves the following results on the evaluation set:
|
| 21 |
-
- Loss:
|
| 22 |
- Num Input Tokens Seen: 54490336
|
| 23 |
|
| 24 |
## Model description
|
|
|
|
| 16 |
|
| 17 |
# train_stsb_1745333591
|
| 18 |
|
| 19 |
+
This model is a fine-tuned version of [meta-llama/Meta-Llama-3-8B-Instruct](https://huggingface.co/meta-llama/Meta-Llama-3-8B-Instruct) on the stsb dataset.
|
| 20 |
It achieves the following results on the evaluation set:
|
| 21 |
+
- Loss: 0.5494
|
| 22 |
- Num Input Tokens Seen: 54490336
|
| 23 |
|
| 24 |
## Model description
|
all_results.json
CHANGED
|
@@ -1,13 +1,13 @@
|
|
| 1 |
{
|
| 2 |
"epoch": 123.45749613601237,
|
| 3 |
"eval_loss": 0.5493518114089966,
|
| 4 |
-
"eval_runtime": 11.
|
| 5 |
-
"eval_samples_per_second":
|
| 6 |
-
"eval_steps_per_second": 12.
|
| 7 |
"num_input_tokens_seen": 54490336,
|
| 8 |
"total_flos": 2.453675202191819e+18,
|
| 9 |
"train_loss": 0.10362623064493919,
|
| 10 |
-
"train_runtime":
|
| 11 |
-
"train_samples_per_second": 21.
|
| 12 |
-
"train_steps_per_second": 1.
|
| 13 |
}
|
|
|
|
| 1 |
{
|
| 2 |
"epoch": 123.45749613601237,
|
| 3 |
"eval_loss": 0.5493518114089966,
|
| 4 |
+
"eval_runtime": 11.3055,
|
| 5 |
+
"eval_samples_per_second": 50.86,
|
| 6 |
+
"eval_steps_per_second": 12.737,
|
| 7 |
"num_input_tokens_seen": 54490336,
|
| 8 |
"total_flos": 2.453675202191819e+18,
|
| 9 |
"train_loss": 0.10362623064493919,
|
| 10 |
+
"train_runtime": 29204.064,
|
| 11 |
+
"train_samples_per_second": 21.915,
|
| 12 |
+
"train_steps_per_second": 1.37
|
| 13 |
}
|
eval_results.json
CHANGED
|
@@ -1,8 +1,8 @@
|
|
| 1 |
{
|
| 2 |
"epoch": 123.45749613601237,
|
| 3 |
"eval_loss": 0.5493518114089966,
|
| 4 |
-
"eval_runtime": 11.
|
| 5 |
-
"eval_samples_per_second":
|
| 6 |
-
"eval_steps_per_second": 12.
|
| 7 |
"num_input_tokens_seen": 54490336
|
| 8 |
}
|
|
|
|
| 1 |
{
|
| 2 |
"epoch": 123.45749613601237,
|
| 3 |
"eval_loss": 0.5493518114089966,
|
| 4 |
+
"eval_runtime": 11.3055,
|
| 5 |
+
"eval_samples_per_second": 50.86,
|
| 6 |
+
"eval_steps_per_second": 12.737,
|
| 7 |
"num_input_tokens_seen": 54490336
|
| 8 |
}
|
train_results.json
CHANGED
|
@@ -3,7 +3,7 @@
|
|
| 3 |
"num_input_tokens_seen": 54490336,
|
| 4 |
"total_flos": 2.453675202191819e+18,
|
| 5 |
"train_loss": 0.10362623064493919,
|
| 6 |
-
"train_runtime":
|
| 7 |
-
"train_samples_per_second": 21.
|
| 8 |
-
"train_steps_per_second": 1.
|
| 9 |
}
|
|
|
|
| 3 |
"num_input_tokens_seen": 54490336,
|
| 4 |
"total_flos": 2.453675202191819e+18,
|
| 5 |
"train_loss": 0.10362623064493919,
|
| 6 |
+
"train_runtime": 29204.064,
|
| 7 |
+
"train_samples_per_second": 21.915,
|
| 8 |
+
"train_steps_per_second": 1.37
|
| 9 |
}
|
trainer_state.json
CHANGED
|
@@ -332,9 +332,9 @@
|
|
| 332 |
{
|
| 333 |
"epoch": 0.6182380216383307,
|
| 334 |
"eval_loss": 0.9577658176422119,
|
| 335 |
-
"eval_runtime": 11.
|
| 336 |
-
"eval_samples_per_second": 51.
|
| 337 |
-
"eval_steps_per_second": 12.
|
| 338 |
"num_input_tokens_seen": 272576,
|
| 339 |
"step": 200
|
| 340 |
},
|
|
@@ -661,9 +661,9 @@
|
|
| 661 |
{
|
| 662 |
"epoch": 1.2349304482225656,
|
| 663 |
"eval_loss": 0.7184381484985352,
|
| 664 |
-
"eval_runtime": 11.
|
| 665 |
-
"eval_samples_per_second":
|
| 666 |
-
"eval_steps_per_second": 12.
|
| 667 |
"num_input_tokens_seen": 544096,
|
| 668 |
"step": 400
|
| 669 |
},
|
|
@@ -990,9 +990,9 @@
|
|
| 990 |
{
|
| 991 |
"epoch": 1.8531684698608966,
|
| 992 |
"eval_loss": 0.6815493106842041,
|
| 993 |
-
"eval_runtime": 11.
|
| 994 |
-
"eval_samples_per_second":
|
| 995 |
-
"eval_steps_per_second": 12.
|
| 996 |
"num_input_tokens_seen": 818048,
|
| 997 |
"step": 600
|
| 998 |
},
|
|
@@ -1319,9 +1319,9 @@
|
|
| 1319 |
{
|
| 1320 |
"epoch": 2.469860896445131,
|
| 1321 |
"eval_loss": 0.6753404140472412,
|
| 1322 |
-
"eval_runtime": 11.
|
| 1323 |
-
"eval_samples_per_second":
|
| 1324 |
-
"eval_steps_per_second": 12.
|
| 1325 |
"num_input_tokens_seen": 1089600,
|
| 1326 |
"step": 800
|
| 1327 |
},
|
|
@@ -1648,9 +1648,9 @@
|
|
| 1648 |
{
|
| 1649 |
"epoch": 3.0865533230293662,
|
| 1650 |
"eval_loss": 0.6587666273117065,
|
| 1651 |
-
"eval_runtime": 11.
|
| 1652 |
-
"eval_samples_per_second":
|
| 1653 |
-
"eval_steps_per_second": 12.
|
| 1654 |
"num_input_tokens_seen": 1361504,
|
| 1655 |
"step": 1000
|
| 1656 |
},
|
|
@@ -1977,9 +1977,9 @@
|
|
| 1977 |
{
|
| 1978 |
"epoch": 3.704791344667697,
|
| 1979 |
"eval_loss": 0.6507958173751831,
|
| 1980 |
-
"eval_runtime": 11.
|
| 1981 |
-
"eval_samples_per_second":
|
| 1982 |
-
"eval_steps_per_second": 12.
|
| 1983 |
"num_input_tokens_seen": 1636960,
|
| 1984 |
"step": 1200
|
| 1985 |
},
|
|
@@ -2306,9 +2306,9 @@
|
|
| 2306 |
{
|
| 2307 |
"epoch": 4.321483771251932,
|
| 2308 |
"eval_loss": 0.6580312252044678,
|
| 2309 |
-
"eval_runtime": 11.
|
| 2310 |
-
"eval_samples_per_second":
|
| 2311 |
-
"eval_steps_per_second": 12.
|
| 2312 |
"num_input_tokens_seen": 1909696,
|
| 2313 |
"step": 1400
|
| 2314 |
},
|
|
@@ -2635,9 +2635,9 @@
|
|
| 2635 |
{
|
| 2636 |
"epoch": 4.939721792890262,
|
| 2637 |
"eval_loss": 0.6381492614746094,
|
| 2638 |
-
"eval_runtime": 11.
|
| 2639 |
-
"eval_samples_per_second":
|
| 2640 |
-
"eval_steps_per_second": 12.
|
| 2641 |
"num_input_tokens_seen": 2182656,
|
| 2642 |
"step": 1600
|
| 2643 |
},
|
|
@@ -2964,9 +2964,9 @@
|
|
| 2964 |
{
|
| 2965 |
"epoch": 5.556414219474497,
|
| 2966 |
"eval_loss": 0.6330167055130005,
|
| 2967 |
-
"eval_runtime": 11.
|
| 2968 |
-
"eval_samples_per_second": 50.
|
| 2969 |
-
"eval_steps_per_second": 12.
|
| 2970 |
"num_input_tokens_seen": 2453904,
|
| 2971 |
"step": 1800
|
| 2972 |
},
|
|
@@ -3293,9 +3293,9 @@
|
|
| 3293 |
{
|
| 3294 |
"epoch": 6.1731066460587325,
|
| 3295 |
"eval_loss": 0.6232376098632812,
|
| 3296 |
-
"eval_runtime": 11.
|
| 3297 |
-
"eval_samples_per_second": 50.
|
| 3298 |
-
"eval_steps_per_second": 12.
|
| 3299 |
"num_input_tokens_seen": 2727984,
|
| 3300 |
"step": 2000
|
| 3301 |
},
|
|
@@ -3622,9 +3622,9 @@
|
|
| 3622 |
{
|
| 3623 |
"epoch": 6.7913446676970635,
|
| 3624 |
"eval_loss": 0.6167892813682556,
|
| 3625 |
-
"eval_runtime": 11.
|
| 3626 |
-
"eval_samples_per_second": 50.
|
| 3627 |
-
"eval_steps_per_second": 12.
|
| 3628 |
"num_input_tokens_seen": 2999760,
|
| 3629 |
"step": 2200
|
| 3630 |
},
|
|
@@ -3951,9 +3951,9 @@
|
|
| 3951 |
{
|
| 3952 |
"epoch": 7.4080370942812985,
|
| 3953 |
"eval_loss": 0.5621501207351685,
|
| 3954 |
-
"eval_runtime": 11.
|
| 3955 |
-
"eval_samples_per_second":
|
| 3956 |
-
"eval_steps_per_second": 12.
|
| 3957 |
"num_input_tokens_seen": 3274528,
|
| 3958 |
"step": 2400
|
| 3959 |
},
|
|
@@ -4280,9 +4280,9 @@
|
|
| 4280 |
{
|
| 4281 |
"epoch": 8.024729520865533,
|
| 4282 |
"eval_loss": 0.5813793540000916,
|
| 4283 |
-
"eval_runtime": 11.
|
| 4284 |
-
"eval_samples_per_second": 50.
|
| 4285 |
-
"eval_steps_per_second": 12.
|
| 4286 |
"num_input_tokens_seen": 3546880,
|
| 4287 |
"step": 2600
|
| 4288 |
},
|
|
@@ -4609,9 +4609,9 @@
|
|
| 4609 |
{
|
| 4610 |
"epoch": 8.642967542503865,
|
| 4611 |
"eval_loss": 0.5915025472640991,
|
| 4612 |
-
"eval_runtime": 11.
|
| 4613 |
-
"eval_samples_per_second": 50.
|
| 4614 |
-
"eval_steps_per_second": 12.
|
| 4615 |
"num_input_tokens_seen": 3821184,
|
| 4616 |
"step": 2800
|
| 4617 |
},
|
|
@@ -4938,9 +4938,9 @@
|
|
| 4938 |
{
|
| 4939 |
"epoch": 9.2596599690881,
|
| 4940 |
"eval_loss": 0.5584082007408142,
|
| 4941 |
-
"eval_runtime": 11.
|
| 4942 |
-
"eval_samples_per_second": 50.
|
| 4943 |
-
"eval_steps_per_second": 12.
|
| 4944 |
"num_input_tokens_seen": 4090704,
|
| 4945 |
"step": 3000
|
| 4946 |
},
|
|
@@ -5267,9 +5267,9 @@
|
|
| 5267 |
{
|
| 5268 |
"epoch": 9.87789799072643,
|
| 5269 |
"eval_loss": 0.562062680721283,
|
| 5270 |
-
"eval_runtime": 11.
|
| 5271 |
-
"eval_samples_per_second": 50.
|
| 5272 |
-
"eval_steps_per_second": 12.
|
| 5273 |
"num_input_tokens_seen": 4363696,
|
| 5274 |
"step": 3200
|
| 5275 |
},
|
|
@@ -5596,9 +5596,9 @@
|
|
| 5596 |
{
|
| 5597 |
"epoch": 10.494590417310665,
|
| 5598 |
"eval_loss": 0.5493518114089966,
|
| 5599 |
-
"eval_runtime": 11.
|
| 5600 |
-
"eval_samples_per_second": 50.
|
| 5601 |
-
"eval_steps_per_second": 12.
|
| 5602 |
"num_input_tokens_seen": 4636656,
|
| 5603 |
"step": 3400
|
| 5604 |
},
|
|
@@ -5925,9 +5925,9 @@
|
|
| 5925 |
{
|
| 5926 |
"epoch": 11.1112828438949,
|
| 5927 |
"eval_loss": 0.5832644701004028,
|
| 5928 |
-
"eval_runtime": 11.
|
| 5929 |
-
"eval_samples_per_second": 50.
|
| 5930 |
-
"eval_steps_per_second": 12.
|
| 5931 |
"num_input_tokens_seen": 4908928,
|
| 5932 |
"step": 3600
|
| 5933 |
},
|
|
@@ -6254,9 +6254,9 @@
|
|
| 6254 |
{
|
| 6255 |
"epoch": 11.72952086553323,
|
| 6256 |
"eval_loss": 0.5668447017669678,
|
| 6257 |
-
"eval_runtime": 11.
|
| 6258 |
-
"eval_samples_per_second": 50.
|
| 6259 |
-
"eval_steps_per_second": 12.
|
| 6260 |
"num_input_tokens_seen": 5179040,
|
| 6261 |
"step": 3800
|
| 6262 |
},
|
|
@@ -6583,9 +6583,9 @@
|
|
| 6583 |
{
|
| 6584 |
"epoch": 12.346213292117465,
|
| 6585 |
"eval_loss": 0.5749086737632751,
|
| 6586 |
-
"eval_runtime": 11.
|
| 6587 |
-
"eval_samples_per_second": 50.
|
| 6588 |
-
"eval_steps_per_second": 12.
|
| 6589 |
"num_input_tokens_seen": 5452192,
|
| 6590 |
"step": 4000
|
| 6591 |
},
|
|
@@ -6912,9 +6912,9 @@
|
|
| 6912 |
{
|
| 6913 |
"epoch": 12.964451313755795,
|
| 6914 |
"eval_loss": 0.564673900604248,
|
| 6915 |
-
"eval_runtime": 11.
|
| 6916 |
-
"eval_samples_per_second": 50.
|
| 6917 |
-
"eval_steps_per_second": 12.
|
| 6918 |
"num_input_tokens_seen": 5724448,
|
| 6919 |
"step": 4200
|
| 6920 |
},
|
|
@@ -7241,9 +7241,9 @@
|
|
| 7241 |
{
|
| 7242 |
"epoch": 13.58114374034003,
|
| 7243 |
"eval_loss": 0.557171642780304,
|
| 7244 |
-
"eval_runtime": 11.
|
| 7245 |
-
"eval_samples_per_second": 50.
|
| 7246 |
-
"eval_steps_per_second": 12.
|
| 7247 |
"num_input_tokens_seen": 5998032,
|
| 7248 |
"step": 4400
|
| 7249 |
},
|
|
@@ -7570,9 +7570,9 @@
|
|
| 7570 |
{
|
| 7571 |
"epoch": 14.197836166924265,
|
| 7572 |
"eval_loss": 0.5686624050140381,
|
| 7573 |
-
"eval_runtime": 11.
|
| 7574 |
-
"eval_samples_per_second": 50.
|
| 7575 |
-
"eval_steps_per_second": 12.
|
| 7576 |
"num_input_tokens_seen": 6269792,
|
| 7577 |
"step": 4600
|
| 7578 |
},
|
|
@@ -7899,9 +7899,9 @@
|
|
| 7899 |
{
|
| 7900 |
"epoch": 14.816074188562597,
|
| 7901 |
"eval_loss": 0.5626024603843689,
|
| 7902 |
-
"eval_runtime": 11.
|
| 7903 |
-
"eval_samples_per_second": 50.
|
| 7904 |
-
"eval_steps_per_second": 12.
|
| 7905 |
"num_input_tokens_seen": 6541248,
|
| 7906 |
"step": 4800
|
| 7907 |
},
|
|
@@ -8228,9 +8228,9 @@
|
|
| 8228 |
{
|
| 8229 |
"epoch": 15.432766615146832,
|
| 8230 |
"eval_loss": 0.5851988196372986,
|
| 8231 |
-
"eval_runtime": 11.
|
| 8232 |
-
"eval_samples_per_second": 50.
|
| 8233 |
-
"eval_steps_per_second": 12.
|
| 8234 |
"num_input_tokens_seen": 6815200,
|
| 8235 |
"step": 5000
|
| 8236 |
},
|
|
@@ -8557,9 +8557,9 @@
|
|
| 8557 |
{
|
| 8558 |
"epoch": 16.049459041731065,
|
| 8559 |
"eval_loss": 0.6189093589782715,
|
| 8560 |
-
"eval_runtime": 11.
|
| 8561 |
-
"eval_samples_per_second": 50.
|
| 8562 |
-
"eval_steps_per_second": 12.
|
| 8563 |
"num_input_tokens_seen": 7086224,
|
| 8564 |
"step": 5200
|
| 8565 |
},
|
|
@@ -8886,9 +8886,9 @@
|
|
| 8886 |
{
|
| 8887 |
"epoch": 16.667697063369395,
|
| 8888 |
"eval_loss": 0.6122633814811707,
|
| 8889 |
-
"eval_runtime": 11.
|
| 8890 |
-
"eval_samples_per_second": 50.
|
| 8891 |
-
"eval_steps_per_second": 12.
|
| 8892 |
"num_input_tokens_seen": 7360560,
|
| 8893 |
"step": 5400
|
| 8894 |
},
|
|
@@ -9215,9 +9215,9 @@
|
|
| 9215 |
{
|
| 9216 |
"epoch": 17.284389489953632,
|
| 9217 |
"eval_loss": 0.611182689666748,
|
| 9218 |
-
"eval_runtime": 11.
|
| 9219 |
-
"eval_samples_per_second": 50.
|
| 9220 |
-
"eval_steps_per_second": 12.
|
| 9221 |
"num_input_tokens_seen": 7632240,
|
| 9222 |
"step": 5600
|
| 9223 |
},
|
|
@@ -9544,9 +9544,9 @@
|
|
| 9544 |
{
|
| 9545 |
"epoch": 17.902627511591962,
|
| 9546 |
"eval_loss": 0.5843232274055481,
|
| 9547 |
-
"eval_runtime": 11.
|
| 9548 |
-
"eval_samples_per_second": 50.
|
| 9549 |
-
"eval_steps_per_second": 12.
|
| 9550 |
"num_input_tokens_seen": 7904432,
|
| 9551 |
"step": 5800
|
| 9552 |
},
|
|
@@ -9873,9 +9873,9 @@
|
|
| 9873 |
{
|
| 9874 |
"epoch": 18.5193199381762,
|
| 9875 |
"eval_loss": 0.6198561191558838,
|
| 9876 |
-
"eval_runtime": 11.
|
| 9877 |
-
"eval_samples_per_second": 50.
|
| 9878 |
-
"eval_steps_per_second": 12.
|
| 9879 |
"num_input_tokens_seen": 8177168,
|
| 9880 |
"step": 6000
|
| 9881 |
},
|
|
@@ -10202,9 +10202,9 @@
|
|
| 10202 |
{
|
| 10203 |
"epoch": 19.136012364760433,
|
| 10204 |
"eval_loss": 0.6794010996818542,
|
| 10205 |
-
"eval_runtime": 11.
|
| 10206 |
-
"eval_samples_per_second": 50.
|
| 10207 |
-
"eval_steps_per_second": 12.
|
| 10208 |
"num_input_tokens_seen": 8449968,
|
| 10209 |
"step": 6200
|
| 10210 |
},
|
|
@@ -10531,9 +10531,9 @@
|
|
| 10531 |
{
|
| 10532 |
"epoch": 19.754250386398763,
|
| 10533 |
"eval_loss": 0.6374606490135193,
|
| 10534 |
-
"eval_runtime": 11.
|
| 10535 |
-
"eval_samples_per_second": 50.
|
| 10536 |
-
"eval_steps_per_second": 12.
|
| 10537 |
"num_input_tokens_seen": 8722992,
|
| 10538 |
"step": 6400
|
| 10539 |
},
|
|
@@ -10860,9 +10860,9 @@
|
|
| 10860 |
{
|
| 10861 |
"epoch": 20.370942812983,
|
| 10862 |
"eval_loss": 0.6705669164657593,
|
| 10863 |
-
"eval_runtime": 11.
|
| 10864 |
-
"eval_samples_per_second": 50.
|
| 10865 |
-
"eval_steps_per_second": 12.
|
| 10866 |
"num_input_tokens_seen": 8996224,
|
| 10867 |
"step": 6600
|
| 10868 |
},
|
|
@@ -11189,9 +11189,9 @@
|
|
| 11189 |
{
|
| 11190 |
"epoch": 20.98918083462133,
|
| 11191 |
"eval_loss": 0.648054838180542,
|
| 11192 |
-
"eval_runtime": 11.
|
| 11193 |
-
"eval_samples_per_second": 50.
|
| 11194 |
-
"eval_steps_per_second": 12.
|
| 11195 |
"num_input_tokens_seen": 9269504,
|
| 11196 |
"step": 6800
|
| 11197 |
},
|
|
@@ -11518,9 +11518,9 @@
|
|
| 11518 |
{
|
| 11519 |
"epoch": 21.605873261205563,
|
| 11520 |
"eval_loss": 0.7299332618713379,
|
| 11521 |
-
"eval_runtime": 11.
|
| 11522 |
-
"eval_samples_per_second": 50.
|
| 11523 |
-
"eval_steps_per_second": 12.
|
| 11524 |
"num_input_tokens_seen": 9542432,
|
| 11525 |
"step": 7000
|
| 11526 |
},
|
|
@@ -11847,9 +11847,9 @@
|
|
| 11847 |
{
|
| 11848 |
"epoch": 22.2225656877898,
|
| 11849 |
"eval_loss": 0.7840644717216492,
|
| 11850 |
-
"eval_runtime": 11.
|
| 11851 |
-
"eval_samples_per_second": 50.
|
| 11852 |
-
"eval_steps_per_second": 12.
|
| 11853 |
"num_input_tokens_seen": 9812704,
|
| 11854 |
"step": 7200
|
| 11855 |
},
|
|
@@ -12176,9 +12176,9 @@
|
|
| 12176 |
{
|
| 12177 |
"epoch": 22.84080370942813,
|
| 12178 |
"eval_loss": 0.7381678819656372,
|
| 12179 |
-
"eval_runtime": 11.
|
| 12180 |
-
"eval_samples_per_second": 50.
|
| 12181 |
-
"eval_steps_per_second": 12.
|
| 12182 |
"num_input_tokens_seen": 10086272,
|
| 12183 |
"step": 7400
|
| 12184 |
},
|
|
@@ -12505,9 +12505,9 @@
|
|
| 12505 |
{
|
| 12506 |
"epoch": 23.457496136012363,
|
| 12507 |
"eval_loss": 0.7728149890899658,
|
| 12508 |
-
"eval_runtime": 11.
|
| 12509 |
-
"eval_samples_per_second": 50.
|
| 12510 |
-
"eval_steps_per_second": 12.
|
| 12511 |
"num_input_tokens_seen": 10358832,
|
| 12512 |
"step": 7600
|
| 12513 |
},
|
|
@@ -12834,9 +12834,9 @@
|
|
| 12834 |
{
|
| 12835 |
"epoch": 24.0741885625966,
|
| 12836 |
"eval_loss": 0.8268849849700928,
|
| 12837 |
-
"eval_runtime": 11.
|
| 12838 |
-
"eval_samples_per_second":
|
| 12839 |
-
"eval_steps_per_second": 12.
|
| 12840 |
"num_input_tokens_seen": 10630000,
|
| 12841 |
"step": 7800
|
| 12842 |
},
|
|
@@ -13163,9 +13163,9 @@
|
|
| 13163 |
{
|
| 13164 |
"epoch": 24.69242658423493,
|
| 13165 |
"eval_loss": 0.8175145983695984,
|
| 13166 |
-
"eval_runtime": 11.
|
| 13167 |
-
"eval_samples_per_second": 50.
|
| 13168 |
-
"eval_steps_per_second": 12.
|
| 13169 |
"num_input_tokens_seen": 10904880,
|
| 13170 |
"step": 8000
|
| 13171 |
},
|
|
@@ -13492,9 +13492,9 @@
|
|
| 13492 |
{
|
| 13493 |
"epoch": 25.309119010819167,
|
| 13494 |
"eval_loss": 0.8719689249992371,
|
| 13495 |
-
"eval_runtime": 11.
|
| 13496 |
-
"eval_samples_per_second": 50.
|
| 13497 |
-
"eval_steps_per_second": 12.
|
| 13498 |
"num_input_tokens_seen": 11176208,
|
| 13499 |
"step": 8200
|
| 13500 |
},
|
|
@@ -13821,9 +13821,9 @@
|
|
| 13821 |
{
|
| 13822 |
"epoch": 25.927357032457497,
|
| 13823 |
"eval_loss": 0.9041878581047058,
|
| 13824 |
-
"eval_runtime": 11.
|
| 13825 |
-
"eval_samples_per_second": 50.
|
| 13826 |
-
"eval_steps_per_second": 12.
|
| 13827 |
"num_input_tokens_seen": 11451344,
|
| 13828 |
"step": 8400
|
| 13829 |
},
|
|
@@ -14150,9 +14150,9 @@
|
|
| 14150 |
{
|
| 14151 |
"epoch": 26.54404945904173,
|
| 14152 |
"eval_loss": 0.8620166778564453,
|
| 14153 |
-
"eval_runtime": 11.
|
| 14154 |
-
"eval_samples_per_second": 50.
|
| 14155 |
-
"eval_steps_per_second": 12.
|
| 14156 |
"num_input_tokens_seen": 11723328,
|
| 14157 |
"step": 8600
|
| 14158 |
},
|
|
@@ -14479,9 +14479,9 @@
|
|
| 14479 |
{
|
| 14480 |
"epoch": 27.160741885625967,
|
| 14481 |
"eval_loss": 0.9756768345832825,
|
| 14482 |
-
"eval_runtime": 11.
|
| 14483 |
-
"eval_samples_per_second": 50.
|
| 14484 |
-
"eval_steps_per_second": 12.
|
| 14485 |
"num_input_tokens_seen": 11996224,
|
| 14486 |
"step": 8800
|
| 14487 |
},
|
|
@@ -14808,9 +14808,9 @@
|
|
| 14808 |
{
|
| 14809 |
"epoch": 27.778979907264297,
|
| 14810 |
"eval_loss": 0.9385554194450378,
|
| 14811 |
-
"eval_runtime": 11.
|
| 14812 |
-
"eval_samples_per_second": 50.
|
| 14813 |
-
"eval_steps_per_second": 12.
|
| 14814 |
"num_input_tokens_seen": 12267520,
|
| 14815 |
"step": 9000
|
| 14816 |
},
|
|
@@ -15137,9 +15137,9 @@
|
|
| 15137 |
{
|
| 15138 |
"epoch": 28.39567233384853,
|
| 15139 |
"eval_loss": 0.9237757921218872,
|
| 15140 |
-
"eval_runtime": 11.
|
| 15141 |
-
"eval_samples_per_second": 50.
|
| 15142 |
-
"eval_steps_per_second": 12.
|
| 15143 |
"num_input_tokens_seen": 12542064,
|
| 15144 |
"step": 9200
|
| 15145 |
},
|
|
@@ -15466,9 +15466,9 @@
|
|
| 15466 |
{
|
| 15467 |
"epoch": 29.012364760432767,
|
| 15468 |
"eval_loss": 1.064571499824524,
|
| 15469 |
-
"eval_runtime": 11.
|
| 15470 |
-
"eval_samples_per_second": 50.
|
| 15471 |
-
"eval_steps_per_second": 12.
|
| 15472 |
"num_input_tokens_seen": 12812048,
|
| 15473 |
"step": 9400
|
| 15474 |
},
|
|
@@ -15795,9 +15795,9 @@
|
|
| 15795 |
{
|
| 15796 |
"epoch": 29.630602782071097,
|
| 15797 |
"eval_loss": 1.0749653577804565,
|
| 15798 |
-
"eval_runtime": 11.
|
| 15799 |
-
"eval_samples_per_second": 50.
|
| 15800 |
-
"eval_steps_per_second": 12.
|
| 15801 |
"num_input_tokens_seen": 13085264,
|
| 15802 |
"step": 9600
|
| 15803 |
},
|
|
@@ -16124,9 +16124,9 @@
|
|
| 16124 |
{
|
| 16125 |
"epoch": 30.24729520865533,
|
| 16126 |
"eval_loss": 1.0077648162841797,
|
| 16127 |
-
"eval_runtime": 11.
|
| 16128 |
-
"eval_samples_per_second": 50.
|
| 16129 |
-
"eval_steps_per_second": 12.
|
| 16130 |
"num_input_tokens_seen": 13356384,
|
| 16131 |
"step": 9800
|
| 16132 |
},
|
|
@@ -16453,9 +16453,9 @@
|
|
| 16453 |
{
|
| 16454 |
"epoch": 30.865533230293664,
|
| 16455 |
"eval_loss": 1.057982325553894,
|
| 16456 |
-
"eval_runtime": 11.
|
| 16457 |
-
"eval_samples_per_second": 50.
|
| 16458 |
-
"eval_steps_per_second": 12.
|
| 16459 |
"num_input_tokens_seen": 13629216,
|
| 16460 |
"step": 10000
|
| 16461 |
},
|
|
@@ -16782,9 +16782,9 @@
|
|
| 16782 |
{
|
| 16783 |
"epoch": 31.482225656877898,
|
| 16784 |
"eval_loss": 1.0450738668441772,
|
| 16785 |
-
"eval_runtime": 11.
|
| 16786 |
-
"eval_samples_per_second": 50.
|
| 16787 |
-
"eval_steps_per_second": 12.
|
| 16788 |
"num_input_tokens_seen": 13902736,
|
| 16789 |
"step": 10200
|
| 16790 |
},
|
|
@@ -17111,9 +17111,9 @@
|
|
| 17111 |
{
|
| 17112 |
"epoch": 32.09891808346213,
|
| 17113 |
"eval_loss": 1.0477303266525269,
|
| 17114 |
-
"eval_runtime": 11.
|
| 17115 |
-
"eval_samples_per_second": 50.
|
| 17116 |
-
"eval_steps_per_second": 12.
|
| 17117 |
"num_input_tokens_seen": 14174192,
|
| 17118 |
"step": 10400
|
| 17119 |
},
|
|
@@ -17440,9 +17440,9 @@
|
|
| 17440 |
{
|
| 17441 |
"epoch": 32.717156105100464,
|
| 17442 |
"eval_loss": 1.14347243309021,
|
| 17443 |
-
"eval_runtime": 11.
|
| 17444 |
-
"eval_samples_per_second": 50.
|
| 17445 |
-
"eval_steps_per_second": 12.
|
| 17446 |
"num_input_tokens_seen": 14448176,
|
| 17447 |
"step": 10600
|
| 17448 |
},
|
|
@@ -17769,9 +17769,9 @@
|
|
| 17769 |
{
|
| 17770 |
"epoch": 33.3338485316847,
|
| 17771 |
"eval_loss": 1.0730254650115967,
|
| 17772 |
-
"eval_runtime": 11.
|
| 17773 |
-
"eval_samples_per_second": 50.
|
| 17774 |
-
"eval_steps_per_second": 12.
|
| 17775 |
"num_input_tokens_seen": 14718096,
|
| 17776 |
"step": 10800
|
| 17777 |
},
|
|
@@ -18098,9 +18098,9 @@
|
|
| 18098 |
{
|
| 18099 |
"epoch": 33.95208655332303,
|
| 18100 |
"eval_loss": 1.0351147651672363,
|
| 18101 |
-
"eval_runtime": 11.
|
| 18102 |
-
"eval_samples_per_second": 50.
|
| 18103 |
-
"eval_steps_per_second": 12.
|
| 18104 |
"num_input_tokens_seen": 14992048,
|
| 18105 |
"step": 11000
|
| 18106 |
},
|
|
@@ -18427,9 +18427,9 @@
|
|
| 18427 |
{
|
| 18428 |
"epoch": 34.568778979907265,
|
| 18429 |
"eval_loss": 1.1394553184509277,
|
| 18430 |
-
"eval_runtime": 11.
|
| 18431 |
-
"eval_samples_per_second": 50.
|
| 18432 |
-
"eval_steps_per_second": 12.
|
| 18433 |
"num_input_tokens_seen": 15265072,
|
| 18434 |
"step": 11200
|
| 18435 |
},
|
|
@@ -18756,9 +18756,9 @@
|
|
| 18756 |
{
|
| 18757 |
"epoch": 35.1854714064915,
|
| 18758 |
"eval_loss": 1.1201566457748413,
|
| 18759 |
-
"eval_runtime": 11.
|
| 18760 |
-
"eval_samples_per_second": 50.
|
| 18761 |
-
"eval_steps_per_second": 12.
|
| 18762 |
"num_input_tokens_seen": 15538960,
|
| 18763 |
"step": 11400
|
| 18764 |
},
|
|
@@ -19085,9 +19085,9 @@
|
|
| 19085 |
{
|
| 19086 |
"epoch": 35.80370942812983,
|
| 19087 |
"eval_loss": 1.133685827255249,
|
| 19088 |
-
"eval_runtime": 11.
|
| 19089 |
-
"eval_samples_per_second": 50.
|
| 19090 |
-
"eval_steps_per_second": 12.
|
| 19091 |
"num_input_tokens_seen": 15812880,
|
| 19092 |
"step": 11600
|
| 19093 |
},
|
|
@@ -19414,9 +19414,9 @@
|
|
| 19414 |
{
|
| 19415 |
"epoch": 36.420401854714065,
|
| 19416 |
"eval_loss": 1.1776589155197144,
|
| 19417 |
-
"eval_runtime": 11.
|
| 19418 |
-
"eval_samples_per_second": 50.
|
| 19419 |
-
"eval_steps_per_second": 12.
|
| 19420 |
"num_input_tokens_seen": 16082608,
|
| 19421 |
"step": 11800
|
| 19422 |
},
|
|
@@ -19743,9 +19743,9 @@
|
|
| 19743 |
{
|
| 19744 |
"epoch": 37.0370942812983,
|
| 19745 |
"eval_loss": 1.18972909450531,
|
| 19746 |
-
"eval_runtime": 11.
|
| 19747 |
-
"eval_samples_per_second": 50.
|
| 19748 |
-
"eval_steps_per_second": 12.
|
| 19749 |
"num_input_tokens_seen": 16357888,
|
| 19750 |
"step": 12000
|
| 19751 |
},
|
|
@@ -20072,9 +20072,9 @@
|
|
| 20072 |
{
|
| 20073 |
"epoch": 37.65533230293663,
|
| 20074 |
"eval_loss": 1.2221449613571167,
|
| 20075 |
-
"eval_runtime": 11.
|
| 20076 |
-
"eval_samples_per_second": 50.
|
| 20077 |
-
"eval_steps_per_second": 12.
|
| 20078 |
"num_input_tokens_seen": 16627872,
|
| 20079 |
"step": 12200
|
| 20080 |
},
|
|
@@ -20401,9 +20401,9 @@
|
|
| 20401 |
{
|
| 20402 |
"epoch": 38.272024729520865,
|
| 20403 |
"eval_loss": 1.169758677482605,
|
| 20404 |
-
"eval_runtime": 11.
|
| 20405 |
-
"eval_samples_per_second": 50.
|
| 20406 |
-
"eval_steps_per_second": 12.
|
| 20407 |
"num_input_tokens_seen": 16900336,
|
| 20408 |
"step": 12400
|
| 20409 |
},
|
|
@@ -20730,9 +20730,9 @@
|
|
| 20730 |
{
|
| 20731 |
"epoch": 38.8902627511592,
|
| 20732 |
"eval_loss": 1.1674479246139526,
|
| 20733 |
-
"eval_runtime": 11.
|
| 20734 |
-
"eval_samples_per_second": 50.
|
| 20735 |
-
"eval_steps_per_second": 12.
|
| 20736 |
"num_input_tokens_seen": 17175024,
|
| 20737 |
"step": 12600
|
| 20738 |
},
|
|
@@ -21059,9 +21059,9 @@
|
|
| 21059 |
{
|
| 21060 |
"epoch": 39.50695517774343,
|
| 21061 |
"eval_loss": 1.1664071083068848,
|
| 21062 |
-
"eval_runtime": 11.
|
| 21063 |
-
"eval_samples_per_second": 50.
|
| 21064 |
-
"eval_steps_per_second": 12.
|
| 21065 |
"num_input_tokens_seen": 17446864,
|
| 21066 |
"step": 12800
|
| 21067 |
},
|
|
@@ -21388,9 +21388,9 @@
|
|
| 21388 |
{
|
| 21389 |
"epoch": 40.123647604327665,
|
| 21390 |
"eval_loss": 1.2493196725845337,
|
| 21391 |
-
"eval_runtime": 11.
|
| 21392 |
-
"eval_samples_per_second": 50.
|
| 21393 |
-
"eval_steps_per_second": 12.
|
| 21394 |
"num_input_tokens_seen": 17716560,
|
| 21395 |
"step": 13000
|
| 21396 |
},
|
|
@@ -21717,9 +21717,9 @@
|
|
| 21717 |
{
|
| 21718 |
"epoch": 40.741885625966,
|
| 21719 |
"eval_loss": 1.3186978101730347,
|
| 21720 |
-
"eval_runtime": 11.
|
| 21721 |
-
"eval_samples_per_second": 50.
|
| 21722 |
-
"eval_steps_per_second": 12.
|
| 21723 |
"num_input_tokens_seen": 17991792,
|
| 21724 |
"step": 13200
|
| 21725 |
},
|
|
@@ -22046,9 +22046,9 @@
|
|
| 22046 |
{
|
| 22047 |
"epoch": 41.35857805255023,
|
| 22048 |
"eval_loss": 1.256793737411499,
|
| 22049 |
-
"eval_runtime": 11.
|
| 22050 |
-
"eval_samples_per_second": 50.
|
| 22051 |
-
"eval_steps_per_second": 12.
|
| 22052 |
"num_input_tokens_seen": 18262992,
|
| 22053 |
"step": 13400
|
| 22054 |
},
|
|
@@ -22375,9 +22375,9 @@
|
|
| 22375 |
{
|
| 22376 |
"epoch": 41.97681607418856,
|
| 22377 |
"eval_loss": 1.2447845935821533,
|
| 22378 |
-
"eval_runtime": 11.
|
| 22379 |
-
"eval_samples_per_second": 50.
|
| 22380 |
-
"eval_steps_per_second": 12.
|
| 22381 |
"num_input_tokens_seen": 18536880,
|
| 22382 |
"step": 13600
|
| 22383 |
},
|
|
@@ -22704,9 +22704,9 @@
|
|
| 22704 |
{
|
| 22705 |
"epoch": 42.5935085007728,
|
| 22706 |
"eval_loss": 1.233655333518982,
|
| 22707 |
-
"eval_runtime": 11.
|
| 22708 |
-
"eval_samples_per_second": 50.
|
| 22709 |
-
"eval_steps_per_second": 12.
|
| 22710 |
"num_input_tokens_seen": 18806784,
|
| 22711 |
"step": 13800
|
| 22712 |
},
|
|
@@ -23033,9 +23033,9 @@
|
|
| 23033 |
{
|
| 23034 |
"epoch": 43.210200927357036,
|
| 23035 |
"eval_loss": 1.254447102546692,
|
| 23036 |
-
"eval_runtime": 11.
|
| 23037 |
-
"eval_samples_per_second": 50.
|
| 23038 |
-
"eval_steps_per_second": 12.
|
| 23039 |
"num_input_tokens_seen": 19080608,
|
| 23040 |
"step": 14000
|
| 23041 |
},
|
|
@@ -23362,9 +23362,9 @@
|
|
| 23362 |
{
|
| 23363 |
"epoch": 43.82843894899536,
|
| 23364 |
"eval_loss": 1.3475619554519653,
|
| 23365 |
-
"eval_runtime": 11.
|
| 23366 |
-
"eval_samples_per_second": 50.
|
| 23367 |
-
"eval_steps_per_second": 12.
|
| 23368 |
"num_input_tokens_seen": 19352320,
|
| 23369 |
"step": 14200
|
| 23370 |
},
|
|
@@ -23691,9 +23691,9 @@
|
|
| 23691 |
{
|
| 23692 |
"epoch": 44.4451313755796,
|
| 23693 |
"eval_loss": 1.2956358194351196,
|
| 23694 |
-
"eval_runtime": 11.
|
| 23695 |
-
"eval_samples_per_second": 50.
|
| 23696 |
-
"eval_steps_per_second": 12.
|
| 23697 |
"num_input_tokens_seen": 19624544,
|
| 23698 |
"step": 14400
|
| 23699 |
},
|
|
@@ -24020,9 +24020,9 @@
|
|
| 24020 |
{
|
| 24021 |
"epoch": 45.061823802163836,
|
| 24022 |
"eval_loss": 1.214294195175171,
|
| 24023 |
-
"eval_runtime": 11.
|
| 24024 |
-
"eval_samples_per_second": 50.
|
| 24025 |
-
"eval_steps_per_second": 12.
|
| 24026 |
"num_input_tokens_seen": 19896064,
|
| 24027 |
"step": 14600
|
| 24028 |
},
|
|
@@ -24349,9 +24349,9 @@
|
|
| 24349 |
{
|
| 24350 |
"epoch": 45.68006182380216,
|
| 24351 |
"eval_loss": 1.200486660003662,
|
| 24352 |
-
"eval_runtime": 11.
|
| 24353 |
-
"eval_samples_per_second": 50.
|
| 24354 |
-
"eval_steps_per_second": 12.
|
| 24355 |
"num_input_tokens_seen": 20168064,
|
| 24356 |
"step": 14800
|
| 24357 |
},
|
|
@@ -24678,9 +24678,9 @@
|
|
| 24678 |
{
|
| 24679 |
"epoch": 46.2967542503864,
|
| 24680 |
"eval_loss": 1.3230748176574707,
|
| 24681 |
-
"eval_runtime": 11.
|
| 24682 |
-
"eval_samples_per_second": 50.
|
| 24683 |
-
"eval_steps_per_second": 12.
|
| 24684 |
"num_input_tokens_seen": 20440208,
|
| 24685 |
"step": 15000
|
| 24686 |
},
|
|
@@ -25007,9 +25007,9 @@
|
|
| 25007 |
{
|
| 25008 |
"epoch": 46.914992272024726,
|
| 25009 |
"eval_loss": 1.2638696432113647,
|
| 25010 |
-
"eval_runtime": 11.
|
| 25011 |
-
"eval_samples_per_second": 50.
|
| 25012 |
-
"eval_steps_per_second": 12.
|
| 25013 |
"num_input_tokens_seen": 20713296,
|
| 25014 |
"step": 15200
|
| 25015 |
},
|
|
@@ -25336,9 +25336,9 @@
|
|
| 25336 |
{
|
| 25337 |
"epoch": 47.53168469860896,
|
| 25338 |
"eval_loss": 1.3379755020141602,
|
| 25339 |
-
"eval_runtime": 11.
|
| 25340 |
-
"eval_samples_per_second": 50.
|
| 25341 |
-
"eval_steps_per_second": 12.
|
| 25342 |
"num_input_tokens_seen": 20985744,
|
| 25343 |
"step": 15400
|
| 25344 |
},
|
|
@@ -25665,9 +25665,9 @@
|
|
| 25665 |
{
|
| 25666 |
"epoch": 48.1483771251932,
|
| 25667 |
"eval_loss": 1.2503776550292969,
|
| 25668 |
-
"eval_runtime": 11.
|
| 25669 |
-
"eval_samples_per_second": 50.
|
| 25670 |
-
"eval_steps_per_second": 12.
|
| 25671 |
"num_input_tokens_seen": 21257920,
|
| 25672 |
"step": 15600
|
| 25673 |
},
|
|
@@ -25994,9 +25994,9 @@
|
|
| 25994 |
{
|
| 25995 |
"epoch": 48.76661514683153,
|
| 25996 |
"eval_loss": 1.2862586975097656,
|
| 25997 |
-
"eval_runtime": 11.
|
| 25998 |
-
"eval_samples_per_second": 50.
|
| 25999 |
-
"eval_steps_per_second": 12.
|
| 26000 |
"num_input_tokens_seen": 21529248,
|
| 26001 |
"step": 15800
|
| 26002 |
},
|
|
@@ -26323,9 +26323,9 @@
|
|
| 26323 |
{
|
| 26324 |
"epoch": 49.38330757341576,
|
| 26325 |
"eval_loss": 1.312309741973877,
|
| 26326 |
-
"eval_runtime": 11.
|
| 26327 |
-
"eval_samples_per_second": 50.
|
| 26328 |
-
"eval_steps_per_second": 12.
|
| 26329 |
"num_input_tokens_seen": 21800992,
|
| 26330 |
"step": 16000
|
| 26331 |
},
|
|
@@ -26652,9 +26652,9 @@
|
|
| 26652 |
{
|
| 26653 |
"epoch": 50.0,
|
| 26654 |
"eval_loss": 1.2966762781143188,
|
| 26655 |
-
"eval_runtime": 11.
|
| 26656 |
-
"eval_samples_per_second": 50.
|
| 26657 |
-
"eval_steps_per_second": 12.
|
| 26658 |
"num_input_tokens_seen": 22073392,
|
| 26659 |
"step": 16200
|
| 26660 |
},
|
|
@@ -26981,9 +26981,9 @@
|
|
| 26981 |
{
|
| 26982 |
"epoch": 50.618238021638334,
|
| 26983 |
"eval_loss": 1.3632538318634033,
|
| 26984 |
-
"eval_runtime": 11.
|
| 26985 |
-
"eval_samples_per_second": 50.
|
| 26986 |
-
"eval_steps_per_second": 12.
|
| 26987 |
"num_input_tokens_seen": 22345648,
|
| 26988 |
"step": 16400
|
| 26989 |
},
|
|
@@ -27310,9 +27310,9 @@
|
|
| 27310 |
{
|
| 27311 |
"epoch": 51.23493044822256,
|
| 27312 |
"eval_loss": 1.3670175075531006,
|
| 27313 |
-
"eval_runtime": 11.
|
| 27314 |
-
"eval_samples_per_second": 50.
|
| 27315 |
-
"eval_steps_per_second": 12.
|
| 27316 |
"num_input_tokens_seen": 22617984,
|
| 27317 |
"step": 16600
|
| 27318 |
},
|
|
@@ -27639,9 +27639,9 @@
|
|
| 27639 |
{
|
| 27640 |
"epoch": 51.8531684698609,
|
| 27641 |
"eval_loss": 1.3320527076721191,
|
| 27642 |
-
"eval_runtime": 11.
|
| 27643 |
-
"eval_samples_per_second": 50.
|
| 27644 |
-
"eval_steps_per_second": 12.
|
| 27645 |
"num_input_tokens_seen": 22892544,
|
| 27646 |
"step": 16800
|
| 27647 |
},
|
|
@@ -27968,9 +27968,9 @@
|
|
| 27968 |
{
|
| 27969 |
"epoch": 52.469860896445134,
|
| 27970 |
"eval_loss": 1.430206537246704,
|
| 27971 |
-
"eval_runtime": 11.
|
| 27972 |
-
"eval_samples_per_second": 50.
|
| 27973 |
-
"eval_steps_per_second": 12.
|
| 27974 |
"num_input_tokens_seen": 23163488,
|
| 27975 |
"step": 17000
|
| 27976 |
},
|
|
@@ -28297,9 +28297,9 @@
|
|
| 28297 |
{
|
| 28298 |
"epoch": 53.086553323029364,
|
| 28299 |
"eval_loss": 1.321289300918579,
|
| 28300 |
-
"eval_runtime": 11.
|
| 28301 |
-
"eval_samples_per_second": 50.
|
| 28302 |
-
"eval_steps_per_second": 12.
|
| 28303 |
"num_input_tokens_seen": 23438320,
|
| 28304 |
"step": 17200
|
| 28305 |
},
|
|
@@ -28626,9 +28626,9 @@
|
|
| 28626 |
{
|
| 28627 |
"epoch": 53.7047913446677,
|
| 28628 |
"eval_loss": 1.4570552110671997,
|
| 28629 |
-
"eval_runtime": 11.
|
| 28630 |
-
"eval_samples_per_second": 50.
|
| 28631 |
-
"eval_steps_per_second": 12.
|
| 28632 |
"num_input_tokens_seen": 23708720,
|
| 28633 |
"step": 17400
|
| 28634 |
},
|
|
@@ -28955,9 +28955,9 @@
|
|
| 28955 |
{
|
| 28956 |
"epoch": 54.321483771251934,
|
| 28957 |
"eval_loss": 1.3873727321624756,
|
| 28958 |
-
"eval_runtime": 11.
|
| 28959 |
-
"eval_samples_per_second": 50.
|
| 28960 |
-
"eval_steps_per_second": 12.
|
| 28961 |
"num_input_tokens_seen": 23984304,
|
| 28962 |
"step": 17600
|
| 28963 |
},
|
|
@@ -29284,9 +29284,9 @@
|
|
| 29284 |
{
|
| 29285 |
"epoch": 54.93972179289026,
|
| 29286 |
"eval_loss": 1.4202662706375122,
|
| 29287 |
-
"eval_runtime": 11.
|
| 29288 |
-
"eval_samples_per_second": 50.
|
| 29289 |
-
"eval_steps_per_second": 12.
|
| 29290 |
"num_input_tokens_seen": 24256368,
|
| 29291 |
"step": 17800
|
| 29292 |
},
|
|
@@ -29613,9 +29613,9 @@
|
|
| 29613 |
{
|
| 29614 |
"epoch": 55.5564142194745,
|
| 29615 |
"eval_loss": 1.3830780982971191,
|
| 29616 |
-
"eval_runtime": 11.
|
| 29617 |
-
"eval_samples_per_second": 50.
|
| 29618 |
-
"eval_steps_per_second": 12.
|
| 29619 |
"num_input_tokens_seen": 24527040,
|
| 29620 |
"step": 18000
|
| 29621 |
},
|
|
@@ -29942,9 +29942,9 @@
|
|
| 29942 |
{
|
| 29943 |
"epoch": 56.173106646058734,
|
| 29944 |
"eval_loss": 1.4858934879302979,
|
| 29945 |
-
"eval_runtime": 11.
|
| 29946 |
-
"eval_samples_per_second": 50.
|
| 29947 |
-
"eval_steps_per_second": 12.
|
| 29948 |
"num_input_tokens_seen": 24799312,
|
| 29949 |
"step": 18200
|
| 29950 |
},
|
|
@@ -30271,9 +30271,9 @@
|
|
| 30271 |
{
|
| 30272 |
"epoch": 56.79134466769706,
|
| 30273 |
"eval_loss": 1.5054408311843872,
|
| 30274 |
-
"eval_runtime": 11.
|
| 30275 |
-
"eval_samples_per_second": 50.
|
| 30276 |
-
"eval_steps_per_second": 12.
|
| 30277 |
"num_input_tokens_seen": 25072848,
|
| 30278 |
"step": 18400
|
| 30279 |
},
|
|
@@ -30600,9 +30600,9 @@
|
|
| 30600 |
{
|
| 30601 |
"epoch": 57.4080370942813,
|
| 30602 |
"eval_loss": 1.473268747329712,
|
| 30603 |
-
"eval_runtime": 11.
|
| 30604 |
-
"eval_samples_per_second": 50.
|
| 30605 |
-
"eval_steps_per_second": 12.
|
| 30606 |
"num_input_tokens_seen": 25347056,
|
| 30607 |
"step": 18600
|
| 30608 |
},
|
|
@@ -30929,9 +30929,9 @@
|
|
| 30929 |
{
|
| 30930 |
"epoch": 58.024729520865534,
|
| 30931 |
"eval_loss": 1.5095571279525757,
|
| 30932 |
-
"eval_runtime": 11.
|
| 30933 |
-
"eval_samples_per_second": 50.
|
| 30934 |
-
"eval_steps_per_second": 12.
|
| 30935 |
"num_input_tokens_seen": 25618400,
|
| 30936 |
"step": 18800
|
| 30937 |
},
|
|
@@ -31258,9 +31258,9 @@
|
|
| 31258 |
{
|
| 31259 |
"epoch": 58.64296754250386,
|
| 31260 |
"eval_loss": 1.3225481510162354,
|
| 31261 |
-
"eval_runtime": 11.
|
| 31262 |
-
"eval_samples_per_second": 50.
|
| 31263 |
-
"eval_steps_per_second": 12.
|
| 31264 |
"num_input_tokens_seen": 25892960,
|
| 31265 |
"step": 19000
|
| 31266 |
},
|
|
@@ -31587,9 +31587,9 @@
|
|
| 31587 |
{
|
| 31588 |
"epoch": 59.2596599690881,
|
| 31589 |
"eval_loss": 1.4172106981277466,
|
| 31590 |
-
"eval_runtime": 11.
|
| 31591 |
-
"eval_samples_per_second": 50.
|
| 31592 |
-
"eval_steps_per_second": 12.
|
| 31593 |
"num_input_tokens_seen": 26164688,
|
| 31594 |
"step": 19200
|
| 31595 |
},
|
|
@@ -31916,9 +31916,9 @@
|
|
| 31916 |
{
|
| 31917 |
"epoch": 59.87789799072643,
|
| 31918 |
"eval_loss": 1.35789155960083,
|
| 31919 |
-
"eval_runtime": 11.
|
| 31920 |
-
"eval_samples_per_second": 50.
|
| 31921 |
-
"eval_steps_per_second": 12.
|
| 31922 |
"num_input_tokens_seen": 26437392,
|
| 31923 |
"step": 19400
|
| 31924 |
},
|
|
@@ -32245,9 +32245,9 @@
|
|
| 32245 |
{
|
| 32246 |
"epoch": 60.49459041731066,
|
| 32247 |
"eval_loss": 1.406263828277588,
|
| 32248 |
-
"eval_runtime":
|
| 32249 |
-
"eval_samples_per_second":
|
| 32250 |
-
"eval_steps_per_second":
|
| 32251 |
"num_input_tokens_seen": 26710176,
|
| 32252 |
"step": 19600
|
| 32253 |
},
|
|
@@ -32574,9 +32574,9 @@
|
|
| 32574 |
{
|
| 32575 |
"epoch": 61.1112828438949,
|
| 32576 |
"eval_loss": 1.4265893697738647,
|
| 32577 |
-
"eval_runtime": 11.
|
| 32578 |
-
"eval_samples_per_second": 50.
|
| 32579 |
-
"eval_steps_per_second": 12.
|
| 32580 |
"num_input_tokens_seen": 26981728,
|
| 32581 |
"step": 19800
|
| 32582 |
},
|
|
@@ -32903,9 +32903,9 @@
|
|
| 32903 |
{
|
| 32904 |
"epoch": 61.72952086553323,
|
| 32905 |
"eval_loss": 1.3551362752914429,
|
| 32906 |
-
"eval_runtime": 11.
|
| 32907 |
-
"eval_samples_per_second": 50.
|
| 32908 |
-
"eval_steps_per_second": 12.
|
| 32909 |
"num_input_tokens_seen": 27253632,
|
| 32910 |
"step": 20000
|
| 32911 |
},
|
|
@@ -33232,9 +33232,9 @@
|
|
| 33232 |
{
|
| 33233 |
"epoch": 62.34621329211747,
|
| 33234 |
"eval_loss": 1.4743679761886597,
|
| 33235 |
-
"eval_runtime": 11.
|
| 33236 |
-
"eval_samples_per_second": 50.
|
| 33237 |
-
"eval_steps_per_second": 12.
|
| 33238 |
"num_input_tokens_seen": 27524928,
|
| 33239 |
"step": 20200
|
| 33240 |
},
|
|
@@ -33561,9 +33561,9 @@
|
|
| 33561 |
{
|
| 33562 |
"epoch": 62.964451313755795,
|
| 33563 |
"eval_loss": 1.5115978717803955,
|
| 33564 |
-
"eval_runtime": 11.
|
| 33565 |
-
"eval_samples_per_second": 50.
|
| 33566 |
-
"eval_steps_per_second": 12.
|
| 33567 |
"num_input_tokens_seen": 27799712,
|
| 33568 |
"step": 20400
|
| 33569 |
},
|
|
@@ -33890,9 +33890,9 @@
|
|
| 33890 |
{
|
| 33891 |
"epoch": 63.58114374034003,
|
| 33892 |
"eval_loss": 1.5977118015289307,
|
| 33893 |
-
"eval_runtime": 11.
|
| 33894 |
-
"eval_samples_per_second": 50.
|
| 33895 |
-
"eval_steps_per_second": 12.
|
| 33896 |
"num_input_tokens_seen": 28071024,
|
| 33897 |
"step": 20600
|
| 33898 |
},
|
|
@@ -34219,9 +34219,9 @@
|
|
| 34219 |
{
|
| 34220 |
"epoch": 64.19783616692426,
|
| 34221 |
"eval_loss": 1.5763089656829834,
|
| 34222 |
-
"eval_runtime": 11.
|
| 34223 |
-
"eval_samples_per_second": 50.
|
| 34224 |
-
"eval_steps_per_second": 12.
|
| 34225 |
"num_input_tokens_seen": 28342880,
|
| 34226 |
"step": 20800
|
| 34227 |
},
|
|
@@ -34548,9 +34548,9 @@
|
|
| 34548 |
{
|
| 34549 |
"epoch": 64.8160741885626,
|
| 34550 |
"eval_loss": 1.6289054155349731,
|
| 34551 |
-
"eval_runtime": 11.
|
| 34552 |
-
"eval_samples_per_second": 50.
|
| 34553 |
-
"eval_steps_per_second": 12.
|
| 34554 |
"num_input_tokens_seen": 28617696,
|
| 34555 |
"step": 21000
|
| 34556 |
},
|
|
@@ -34877,9 +34877,9 @@
|
|
| 34877 |
{
|
| 34878 |
"epoch": 65.43276661514683,
|
| 34879 |
"eval_loss": 1.6688075065612793,
|
| 34880 |
-
"eval_runtime": 11.
|
| 34881 |
-
"eval_samples_per_second": 50.
|
| 34882 |
-
"eval_steps_per_second": 12.
|
| 34883 |
"num_input_tokens_seen": 28888112,
|
| 34884 |
"step": 21200
|
| 34885 |
},
|
|
@@ -35206,9 +35206,9 @@
|
|
| 35206 |
{
|
| 35207 |
"epoch": 66.04945904173107,
|
| 35208 |
"eval_loss": 1.6155662536621094,
|
| 35209 |
-
"eval_runtime": 11.
|
| 35210 |
-
"eval_samples_per_second": 50.
|
| 35211 |
-
"eval_steps_per_second": 12.
|
| 35212 |
"num_input_tokens_seen": 29162944,
|
| 35213 |
"step": 21400
|
| 35214 |
},
|
|
@@ -35535,9 +35535,9 @@
|
|
| 35535 |
{
|
| 35536 |
"epoch": 66.6676970633694,
|
| 35537 |
"eval_loss": 1.6828913688659668,
|
| 35538 |
-
"eval_runtime": 11.
|
| 35539 |
-
"eval_samples_per_second": 50.
|
| 35540 |
-
"eval_steps_per_second": 12.
|
| 35541 |
"num_input_tokens_seen": 29434784,
|
| 35542 |
"step": 21600
|
| 35543 |
},
|
|
@@ -35864,9 +35864,9 @@
|
|
| 35864 |
{
|
| 35865 |
"epoch": 67.28438948995363,
|
| 35866 |
"eval_loss": 1.6700409650802612,
|
| 35867 |
-
"eval_runtime": 11.
|
| 35868 |
-
"eval_samples_per_second": 50.
|
| 35869 |
-
"eval_steps_per_second": 12.
|
| 35870 |
"num_input_tokens_seen": 29706800,
|
| 35871 |
"step": 21800
|
| 35872 |
},
|
|
@@ -36193,9 +36193,9 @@
|
|
| 36193 |
{
|
| 36194 |
"epoch": 67.90262751159196,
|
| 36195 |
"eval_loss": 1.6916232109069824,
|
| 36196 |
-
"eval_runtime": 11.
|
| 36197 |
-
"eval_samples_per_second": 50.
|
| 36198 |
-
"eval_steps_per_second": 12.
|
| 36199 |
"num_input_tokens_seen": 29980240,
|
| 36200 |
"step": 22000
|
| 36201 |
},
|
|
@@ -36522,9 +36522,9 @@
|
|
| 36522 |
{
|
| 36523 |
"epoch": 68.5193199381762,
|
| 36524 |
"eval_loss": 1.7332632541656494,
|
| 36525 |
-
"eval_runtime": 11.
|
| 36526 |
-
"eval_samples_per_second": 50.
|
| 36527 |
-
"eval_steps_per_second": 12.
|
| 36528 |
"num_input_tokens_seen": 30250192,
|
| 36529 |
"step": 22200
|
| 36530 |
},
|
|
@@ -36851,9 +36851,9 @@
|
|
| 36851 |
{
|
| 36852 |
"epoch": 69.13601236476043,
|
| 36853 |
"eval_loss": 1.7388529777526855,
|
| 36854 |
-
"eval_runtime": 11.
|
| 36855 |
-
"eval_samples_per_second": 50.
|
| 36856 |
-
"eval_steps_per_second": 12.
|
| 36857 |
"num_input_tokens_seen": 30522672,
|
| 36858 |
"step": 22400
|
| 36859 |
},
|
|
@@ -37180,9 +37180,9 @@
|
|
| 37180 |
{
|
| 37181 |
"epoch": 69.75425038639877,
|
| 37182 |
"eval_loss": 1.7202584743499756,
|
| 37183 |
-
"eval_runtime": 11.
|
| 37184 |
-
"eval_samples_per_second": 50.
|
| 37185 |
-
"eval_steps_per_second": 12.
|
| 37186 |
"num_input_tokens_seen": 30795024,
|
| 37187 |
"step": 22600
|
| 37188 |
},
|
|
@@ -37509,9 +37509,9 @@
|
|
| 37509 |
{
|
| 37510 |
"epoch": 70.370942812983,
|
| 37511 |
"eval_loss": 1.7700324058532715,
|
| 37512 |
-
"eval_runtime": 11.
|
| 37513 |
-
"eval_samples_per_second": 50.
|
| 37514 |
-
"eval_steps_per_second": 12.
|
| 37515 |
"num_input_tokens_seen": 31066544,
|
| 37516 |
"step": 22800
|
| 37517 |
},
|
|
@@ -37838,9 +37838,9 @@
|
|
| 37838 |
{
|
| 37839 |
"epoch": 70.98918083462132,
|
| 37840 |
"eval_loss": 1.769662857055664,
|
| 37841 |
-
"eval_runtime": 11.
|
| 37842 |
-
"eval_samples_per_second": 50.
|
| 37843 |
-
"eval_steps_per_second": 12.
|
| 37844 |
"num_input_tokens_seen": 31338128,
|
| 37845 |
"step": 23000
|
| 37846 |
},
|
|
@@ -38167,9 +38167,9 @@
|
|
| 38167 |
{
|
| 38168 |
"epoch": 71.60587326120556,
|
| 38169 |
"eval_loss": 1.8099125623703003,
|
| 38170 |
-
"eval_runtime": 11.
|
| 38171 |
-
"eval_samples_per_second": 50.
|
| 38172 |
-
"eval_steps_per_second": 12.
|
| 38173 |
"num_input_tokens_seen": 31609104,
|
| 38174 |
"step": 23200
|
| 38175 |
},
|
|
@@ -38496,9 +38496,9 @@
|
|
| 38496 |
{
|
| 38497 |
"epoch": 72.2225656877898,
|
| 38498 |
"eval_loss": 1.856191873550415,
|
| 38499 |
-
"eval_runtime": 11.
|
| 38500 |
-
"eval_samples_per_second": 50.
|
| 38501 |
-
"eval_steps_per_second": 12.
|
| 38502 |
"num_input_tokens_seen": 31881424,
|
| 38503 |
"step": 23400
|
| 38504 |
},
|
|
@@ -38825,9 +38825,9 @@
|
|
| 38825 |
{
|
| 38826 |
"epoch": 72.84080370942813,
|
| 38827 |
"eval_loss": 1.7837176322937012,
|
| 38828 |
-
"eval_runtime": 11.
|
| 38829 |
-
"eval_samples_per_second": 50.
|
| 38830 |
-
"eval_steps_per_second": 12.
|
| 38831 |
"num_input_tokens_seen": 32155024,
|
| 38832 |
"step": 23600
|
| 38833 |
},
|
|
@@ -39154,9 +39154,9 @@
|
|
| 39154 |
{
|
| 39155 |
"epoch": 73.45749613601237,
|
| 39156 |
"eval_loss": 1.8125648498535156,
|
| 39157 |
-
"eval_runtime": 11.
|
| 39158 |
-
"eval_samples_per_second": 50.
|
| 39159 |
-
"eval_steps_per_second": 12.
|
| 39160 |
"num_input_tokens_seen": 32425312,
|
| 39161 |
"step": 23800
|
| 39162 |
},
|
|
@@ -39483,9 +39483,9 @@
|
|
| 39483 |
{
|
| 39484 |
"epoch": 74.0741885625966,
|
| 39485 |
"eval_loss": 1.8575142621994019,
|
| 39486 |
-
"eval_runtime": 11.
|
| 39487 |
-
"eval_samples_per_second": 50.
|
| 39488 |
-
"eval_steps_per_second": 12.
|
| 39489 |
"num_input_tokens_seen": 32698784,
|
| 39490 |
"step": 24000
|
| 39491 |
},
|
|
@@ -39812,9 +39812,9 @@
|
|
| 39812 |
{
|
| 39813 |
"epoch": 74.69242658423494,
|
| 39814 |
"eval_loss": 1.8753187656402588,
|
| 39815 |
-
"eval_runtime": 11.
|
| 39816 |
-
"eval_samples_per_second": 50.
|
| 39817 |
-
"eval_steps_per_second": 12.
|
| 39818 |
"num_input_tokens_seen": 32974144,
|
| 39819 |
"step": 24200
|
| 39820 |
},
|
|
@@ -40141,9 +40141,9 @@
|
|
| 40141 |
{
|
| 40142 |
"epoch": 75.30911901081916,
|
| 40143 |
"eval_loss": 1.9167370796203613,
|
| 40144 |
-
"eval_runtime": 11.
|
| 40145 |
-
"eval_samples_per_second": 50.
|
| 40146 |
-
"eval_steps_per_second": 12.
|
| 40147 |
"num_input_tokens_seen": 33245216,
|
| 40148 |
"step": 24400
|
| 40149 |
},
|
|
@@ -40470,9 +40470,9 @@
|
|
| 40470 |
{
|
| 40471 |
"epoch": 75.9273570324575,
|
| 40472 |
"eval_loss": 1.1968048810958862,
|
| 40473 |
-
"eval_runtime": 11.
|
| 40474 |
-
"eval_samples_per_second": 50.
|
| 40475 |
-
"eval_steps_per_second": 12.
|
| 40476 |
"num_input_tokens_seen": 33517088,
|
| 40477 |
"step": 24600
|
| 40478 |
},
|
|
@@ -40799,9 +40799,9 @@
|
|
| 40799 |
{
|
| 40800 |
"epoch": 76.54404945904173,
|
| 40801 |
"eval_loss": 1.3781951665878296,
|
| 40802 |
-
"eval_runtime": 11.
|
| 40803 |
-
"eval_samples_per_second": 50.
|
| 40804 |
-
"eval_steps_per_second": 12.
|
| 40805 |
"num_input_tokens_seen": 33788432,
|
| 40806 |
"step": 24800
|
| 40807 |
},
|
|
@@ -41128,9 +41128,9 @@
|
|
| 41128 |
{
|
| 41129 |
"epoch": 77.16074188562597,
|
| 41130 |
"eval_loss": 1.5010449886322021,
|
| 41131 |
-
"eval_runtime": 11.
|
| 41132 |
-
"eval_samples_per_second": 50.
|
| 41133 |
-
"eval_steps_per_second": 12.
|
| 41134 |
"num_input_tokens_seen": 34060416,
|
| 41135 |
"step": 25000
|
| 41136 |
},
|
|
@@ -41457,9 +41457,9 @@
|
|
| 41457 |
{
|
| 41458 |
"epoch": 77.7789799072643,
|
| 41459 |
"eval_loss": 1.5149627923965454,
|
| 41460 |
-
"eval_runtime": 11.
|
| 41461 |
-
"eval_samples_per_second": 50.
|
| 41462 |
-
"eval_steps_per_second": 12.
|
| 41463 |
"num_input_tokens_seen": 34333408,
|
| 41464 |
"step": 25200
|
| 41465 |
},
|
|
@@ -41786,9 +41786,9 @@
|
|
| 41786 |
{
|
| 41787 |
"epoch": 78.39567233384854,
|
| 41788 |
"eval_loss": 1.6160272359848022,
|
| 41789 |
-
"eval_runtime": 11.
|
| 41790 |
-
"eval_samples_per_second": 50.
|
| 41791 |
-
"eval_steps_per_second": 12.
|
| 41792 |
"num_input_tokens_seen": 34605392,
|
| 41793 |
"step": 25400
|
| 41794 |
},
|
|
@@ -42115,9 +42115,9 @@
|
|
| 42115 |
{
|
| 42116 |
"epoch": 79.01236476043276,
|
| 42117 |
"eval_loss": 1.5819573402404785,
|
| 42118 |
-
"eval_runtime": 11.
|
| 42119 |
-
"eval_samples_per_second": 50.
|
| 42120 |
-
"eval_steps_per_second": 12.
|
| 42121 |
"num_input_tokens_seen": 34879536,
|
| 42122 |
"step": 25600
|
| 42123 |
},
|
|
@@ -42444,9 +42444,9 @@
|
|
| 42444 |
{
|
| 42445 |
"epoch": 79.6306027820711,
|
| 42446 |
"eval_loss": 1.651304841041565,
|
| 42447 |
-
"eval_runtime": 11.
|
| 42448 |
-
"eval_samples_per_second": 50.
|
| 42449 |
-
"eval_steps_per_second": 12.
|
| 42450 |
"num_input_tokens_seen": 35153488,
|
| 42451 |
"step": 25800
|
| 42452 |
},
|
|
@@ -42773,9 +42773,9 @@
|
|
| 42773 |
{
|
| 42774 |
"epoch": 80.24729520865533,
|
| 42775 |
"eval_loss": 1.6964157819747925,
|
| 42776 |
-
"eval_runtime": 11.
|
| 42777 |
-
"eval_samples_per_second": 50.
|
| 42778 |
-
"eval_steps_per_second": 12.
|
| 42779 |
"num_input_tokens_seen": 35424912,
|
| 42780 |
"step": 26000
|
| 42781 |
},
|
|
@@ -43102,9 +43102,9 @@
|
|
| 43102 |
{
|
| 43103 |
"epoch": 80.86553323029366,
|
| 43104 |
"eval_loss": 1.7483088970184326,
|
| 43105 |
-
"eval_runtime": 11.
|
| 43106 |
-
"eval_samples_per_second": 50.
|
| 43107 |
-
"eval_steps_per_second": 12.
|
| 43108 |
"num_input_tokens_seen": 35698064,
|
| 43109 |
"step": 26200
|
| 43110 |
},
|
|
@@ -43431,9 +43431,9 @@
|
|
| 43431 |
{
|
| 43432 |
"epoch": 81.4822256568779,
|
| 43433 |
"eval_loss": 1.7370902299880981,
|
| 43434 |
-
"eval_runtime": 11.
|
| 43435 |
-
"eval_samples_per_second": 50.
|
| 43436 |
-
"eval_steps_per_second": 12.
|
| 43437 |
"num_input_tokens_seen": 35968160,
|
| 43438 |
"step": 26400
|
| 43439 |
},
|
|
@@ -43760,9 +43760,9 @@
|
|
| 43760 |
{
|
| 43761 |
"epoch": 82.09891808346214,
|
| 43762 |
"eval_loss": 1.7790963649749756,
|
| 43763 |
-
"eval_runtime": 11.
|
| 43764 |
-
"eval_samples_per_second": 50.
|
| 43765 |
-
"eval_steps_per_second": 12.
|
| 43766 |
"num_input_tokens_seen": 36240928,
|
| 43767 |
"step": 26600
|
| 43768 |
},
|
|
@@ -44089,9 +44089,9 @@
|
|
| 44089 |
{
|
| 44090 |
"epoch": 82.71715610510046,
|
| 44091 |
"eval_loss": 1.772797703742981,
|
| 44092 |
-
"eval_runtime": 11.
|
| 44093 |
-
"eval_samples_per_second": 50.
|
| 44094 |
-
"eval_steps_per_second": 12.
|
| 44095 |
"num_input_tokens_seen": 36514208,
|
| 44096 |
"step": 26800
|
| 44097 |
},
|
|
@@ -44418,9 +44418,9 @@
|
|
| 44418 |
{
|
| 44419 |
"epoch": 83.3338485316847,
|
| 44420 |
"eval_loss": 1.7722996473312378,
|
| 44421 |
-
"eval_runtime": 11.
|
| 44422 |
-
"eval_samples_per_second": 50.
|
| 44423 |
-
"eval_steps_per_second": 12.
|
| 44424 |
"num_input_tokens_seen": 36785136,
|
| 44425 |
"step": 27000
|
| 44426 |
},
|
|
@@ -44747,9 +44747,9 @@
|
|
| 44747 |
{
|
| 44748 |
"epoch": 83.95208655332303,
|
| 44749 |
"eval_loss": 1.8002300262451172,
|
| 44750 |
-
"eval_runtime": 11.
|
| 44751 |
-
"eval_samples_per_second": 50.
|
| 44752 |
-
"eval_steps_per_second": 12.
|
| 44753 |
"num_input_tokens_seen": 37061648,
|
| 44754 |
"step": 27200
|
| 44755 |
},
|
|
@@ -45076,9 +45076,9 @@
|
|
| 45076 |
{
|
| 45077 |
"epoch": 84.56877897990726,
|
| 45078 |
"eval_loss": 1.804320216178894,
|
| 45079 |
-
"eval_runtime": 11.
|
| 45080 |
-
"eval_samples_per_second": 50.
|
| 45081 |
-
"eval_steps_per_second": 12.
|
| 45082 |
"num_input_tokens_seen": 37333648,
|
| 45083 |
"step": 27400
|
| 45084 |
},
|
|
@@ -45405,9 +45405,9 @@
|
|
| 45405 |
{
|
| 45406 |
"epoch": 85.1854714064915,
|
| 45407 |
"eval_loss": 1.8354555368423462,
|
| 45408 |
-
"eval_runtime": 11.
|
| 45409 |
-
"eval_samples_per_second": 50.
|
| 45410 |
-
"eval_steps_per_second": 12.
|
| 45411 |
"num_input_tokens_seen": 37605184,
|
| 45412 |
"step": 27600
|
| 45413 |
},
|
|
@@ -45734,9 +45734,9 @@
|
|
| 45734 |
{
|
| 45735 |
"epoch": 85.80370942812984,
|
| 45736 |
"eval_loss": 1.8400607109069824,
|
| 45737 |
-
"eval_runtime": 11.
|
| 45738 |
-
"eval_samples_per_second": 50.
|
| 45739 |
-
"eval_steps_per_second": 12.
|
| 45740 |
"num_input_tokens_seen": 37875360,
|
| 45741 |
"step": 27800
|
| 45742 |
},
|
|
@@ -46063,9 +46063,9 @@
|
|
| 46063 |
{
|
| 46064 |
"epoch": 86.42040185471407,
|
| 46065 |
"eval_loss": 1.8688201904296875,
|
| 46066 |
-
"eval_runtime": 11.
|
| 46067 |
-
"eval_samples_per_second": 50.
|
| 46068 |
-
"eval_steps_per_second": 12.
|
| 46069 |
"num_input_tokens_seen": 38150208,
|
| 46070 |
"step": 28000
|
| 46071 |
},
|
|
@@ -46392,9 +46392,9 @@
|
|
| 46392 |
{
|
| 46393 |
"epoch": 87.0370942812983,
|
| 46394 |
"eval_loss": 1.810387134552002,
|
| 46395 |
-
"eval_runtime": 11.
|
| 46396 |
-
"eval_samples_per_second": 50.
|
| 46397 |
-
"eval_steps_per_second": 12.
|
| 46398 |
"num_input_tokens_seen": 38422048,
|
| 46399 |
"step": 28200
|
| 46400 |
},
|
|
@@ -46721,9 +46721,9 @@
|
|
| 46721 |
{
|
| 46722 |
"epoch": 87.65533230293663,
|
| 46723 |
"eval_loss": 1.8730015754699707,
|
| 46724 |
-
"eval_runtime": 11.
|
| 46725 |
-
"eval_samples_per_second": 50.
|
| 46726 |
-
"eval_steps_per_second": 12.
|
| 46727 |
"num_input_tokens_seen": 38692224,
|
| 46728 |
"step": 28400
|
| 46729 |
},
|
|
@@ -47050,9 +47050,9 @@
|
|
| 47050 |
{
|
| 47051 |
"epoch": 88.27202472952087,
|
| 47052 |
"eval_loss": 1.8786824941635132,
|
| 47053 |
-
"eval_runtime": 11.
|
| 47054 |
-
"eval_samples_per_second": 50.
|
| 47055 |
-
"eval_steps_per_second": 12.
|
| 47056 |
"num_input_tokens_seen": 38964176,
|
| 47057 |
"step": 28600
|
| 47058 |
},
|
|
@@ -47379,9 +47379,9 @@
|
|
| 47379 |
{
|
| 47380 |
"epoch": 88.8902627511592,
|
| 47381 |
"eval_loss": 1.8849008083343506,
|
| 47382 |
-
"eval_runtime": 11.
|
| 47383 |
-
"eval_samples_per_second": 50.
|
| 47384 |
-
"eval_steps_per_second": 12.
|
| 47385 |
"num_input_tokens_seen": 39235184,
|
| 47386 |
"step": 28800
|
| 47387 |
},
|
|
@@ -47708,9 +47708,9 @@
|
|
| 47708 |
{
|
| 47709 |
"epoch": 89.50695517774344,
|
| 47710 |
"eval_loss": 1.9232840538024902,
|
| 47711 |
-
"eval_runtime": 11.
|
| 47712 |
-
"eval_samples_per_second": 50.
|
| 47713 |
-
"eval_steps_per_second": 12.
|
| 47714 |
"num_input_tokens_seen": 39507520,
|
| 47715 |
"step": 29000
|
| 47716 |
},
|
|
@@ -48037,9 +48037,9 @@
|
|
| 48037 |
{
|
| 48038 |
"epoch": 90.12364760432767,
|
| 48039 |
"eval_loss": 1.9127227067947388,
|
| 48040 |
-
"eval_runtime": 11.
|
| 48041 |
-
"eval_samples_per_second": 50.
|
| 48042 |
-
"eval_steps_per_second": 12.
|
| 48043 |
"num_input_tokens_seen": 39779328,
|
| 48044 |
"step": 29200
|
| 48045 |
},
|
|
@@ -48366,9 +48366,9 @@
|
|
| 48366 |
{
|
| 48367 |
"epoch": 90.74188562596599,
|
| 48368 |
"eval_loss": 1.8981382846832275,
|
| 48369 |
-
"eval_runtime": 11.
|
| 48370 |
-
"eval_samples_per_second": 50.
|
| 48371 |
-
"eval_steps_per_second": 12.
|
| 48372 |
"num_input_tokens_seen": 40051520,
|
| 48373 |
"step": 29400
|
| 48374 |
},
|
|
@@ -48695,9 +48695,9 @@
|
|
| 48695 |
{
|
| 48696 |
"epoch": 91.35857805255023,
|
| 48697 |
"eval_loss": 1.9302953481674194,
|
| 48698 |
-
"eval_runtime": 11.
|
| 48699 |
-
"eval_samples_per_second": 50.
|
| 48700 |
-
"eval_steps_per_second": 12.
|
| 48701 |
"num_input_tokens_seen": 40322576,
|
| 48702 |
"step": 29600
|
| 48703 |
},
|
|
@@ -49024,9 +49024,9 @@
|
|
| 49024 |
{
|
| 49025 |
"epoch": 91.97681607418856,
|
| 49026 |
"eval_loss": 1.9179975986480713,
|
| 49027 |
-
"eval_runtime": 11.
|
| 49028 |
-
"eval_samples_per_second": 50.
|
| 49029 |
-
"eval_steps_per_second": 12.
|
| 49030 |
"num_input_tokens_seen": 40596016,
|
| 49031 |
"step": 29800
|
| 49032 |
},
|
|
@@ -49353,9 +49353,9 @@
|
|
| 49353 |
{
|
| 49354 |
"epoch": 92.5935085007728,
|
| 49355 |
"eval_loss": 1.92044997215271,
|
| 49356 |
-
"eval_runtime": 11.
|
| 49357 |
-
"eval_samples_per_second": 50.
|
| 49358 |
-
"eval_steps_per_second": 12.
|
| 49359 |
"num_input_tokens_seen": 40867568,
|
| 49360 |
"step": 30000
|
| 49361 |
},
|
|
@@ -49682,9 +49682,9 @@
|
|
| 49682 |
{
|
| 49683 |
"epoch": 93.21020092735704,
|
| 49684 |
"eval_loss": 1.9711647033691406,
|
| 49685 |
-
"eval_runtime": 11.
|
| 49686 |
-
"eval_samples_per_second": 50.
|
| 49687 |
-
"eval_steps_per_second": 12.
|
| 49688 |
"num_input_tokens_seen": 41140848,
|
| 49689 |
"step": 30200
|
| 49690 |
},
|
|
@@ -50011,9 +50011,9 @@
|
|
| 50011 |
{
|
| 50012 |
"epoch": 93.82843894899537,
|
| 50013 |
"eval_loss": 1.976061463356018,
|
| 50014 |
-
"eval_runtime": 11.
|
| 50015 |
-
"eval_samples_per_second": 50.
|
| 50016 |
-
"eval_steps_per_second": 12.
|
| 50017 |
"num_input_tokens_seen": 41412848,
|
| 50018 |
"step": 30400
|
| 50019 |
},
|
|
@@ -50340,9 +50340,9 @@
|
|
| 50340 |
{
|
| 50341 |
"epoch": 94.44513137557959,
|
| 50342 |
"eval_loss": 1.958508849143982,
|
| 50343 |
-
"eval_runtime": 11.
|
| 50344 |
-
"eval_samples_per_second": 50.
|
| 50345 |
-
"eval_steps_per_second": 12.
|
| 50346 |
"num_input_tokens_seen": 41683920,
|
| 50347 |
"step": 30600
|
| 50348 |
},
|
|
@@ -50669,9 +50669,9 @@
|
|
| 50669 |
{
|
| 50670 |
"epoch": 95.06182380216383,
|
| 50671 |
"eval_loss": 1.9966574907302856,
|
| 50672 |
-
"eval_runtime": 11.
|
| 50673 |
-
"eval_samples_per_second": 50.
|
| 50674 |
-
"eval_steps_per_second": 12.
|
| 50675 |
"num_input_tokens_seen": 41959008,
|
| 50676 |
"step": 30800
|
| 50677 |
},
|
|
@@ -50998,9 +50998,9 @@
|
|
| 50998 |
{
|
| 50999 |
"epoch": 95.68006182380216,
|
| 51000 |
"eval_loss": 1.9950237274169922,
|
| 51001 |
-
"eval_runtime": 11.
|
| 51002 |
-
"eval_samples_per_second": 50.
|
| 51003 |
-
"eval_steps_per_second": 12.
|
| 51004 |
"num_input_tokens_seen": 42231520,
|
| 51005 |
"step": 31000
|
| 51006 |
},
|
|
@@ -51327,9 +51327,9 @@
|
|
| 51327 |
{
|
| 51328 |
"epoch": 96.2967542503864,
|
| 51329 |
"eval_loss": 1.9839365482330322,
|
| 51330 |
-
"eval_runtime": 11.
|
| 51331 |
-
"eval_samples_per_second": 50.
|
| 51332 |
-
"eval_steps_per_second": 12.
|
| 51333 |
"num_input_tokens_seen": 42502416,
|
| 51334 |
"step": 31200
|
| 51335 |
},
|
|
@@ -51656,9 +51656,9 @@
|
|
| 51656 |
{
|
| 51657 |
"epoch": 96.91499227202473,
|
| 51658 |
"eval_loss": 2.004136323928833,
|
| 51659 |
-
"eval_runtime": 11.
|
| 51660 |
-
"eval_samples_per_second": 50.
|
| 51661 |
-
"eval_steps_per_second": 12.
|
| 51662 |
"num_input_tokens_seen": 42776304,
|
| 51663 |
"step": 31400
|
| 51664 |
},
|
|
@@ -51985,9 +51985,9 @@
|
|
| 51985 |
{
|
| 51986 |
"epoch": 97.53168469860897,
|
| 51987 |
"eval_loss": 2.016206979751587,
|
| 51988 |
-
"eval_runtime": 11.
|
| 51989 |
-
"eval_samples_per_second": 50.
|
| 51990 |
-
"eval_steps_per_second": 12.
|
| 51991 |
"num_input_tokens_seen": 43048176,
|
| 51992 |
"step": 31600
|
| 51993 |
},
|
|
@@ -52314,9 +52314,9 @@
|
|
| 52314 |
{
|
| 52315 |
"epoch": 98.14837712519319,
|
| 52316 |
"eval_loss": 2.010310173034668,
|
| 52317 |
-
"eval_runtime": 11.
|
| 52318 |
-
"eval_samples_per_second": 50.
|
| 52319 |
-
"eval_steps_per_second": 12.
|
| 52320 |
"num_input_tokens_seen": 43320144,
|
| 52321 |
"step": 31800
|
| 52322 |
},
|
|
@@ -52643,9 +52643,9 @@
|
|
| 52643 |
{
|
| 52644 |
"epoch": 98.76661514683153,
|
| 52645 |
"eval_loss": 2.008124589920044,
|
| 52646 |
-
"eval_runtime": 11.
|
| 52647 |
-
"eval_samples_per_second": 50.
|
| 52648 |
-
"eval_steps_per_second": 12.
|
| 52649 |
"num_input_tokens_seen": 43591728,
|
| 52650 |
"step": 32000
|
| 52651 |
},
|
|
@@ -52972,9 +52972,9 @@
|
|
| 52972 |
{
|
| 52973 |
"epoch": 99.38330757341576,
|
| 52974 |
"eval_loss": 2.027338981628418,
|
| 52975 |
-
"eval_runtime": 11.
|
| 52976 |
-
"eval_samples_per_second": 50.
|
| 52977 |
-
"eval_steps_per_second": 12.
|
| 52978 |
"num_input_tokens_seen": 43866048,
|
| 52979 |
"step": 32200
|
| 52980 |
},
|
|
@@ -53301,9 +53301,9 @@
|
|
| 53301 |
{
|
| 53302 |
"epoch": 100.0,
|
| 53303 |
"eval_loss": 2.034688711166382,
|
| 53304 |
-
"eval_runtime": 11.
|
| 53305 |
-
"eval_samples_per_second": 50.
|
| 53306 |
-
"eval_steps_per_second": 12.
|
| 53307 |
"num_input_tokens_seen": 44137040,
|
| 53308 |
"step": 32400
|
| 53309 |
},
|
|
@@ -53630,9 +53630,9 @@
|
|
| 53630 |
{
|
| 53631 |
"epoch": 100.61823802163833,
|
| 53632 |
"eval_loss": 2.052443265914917,
|
| 53633 |
-
"eval_runtime": 11.
|
| 53634 |
-
"eval_samples_per_second": 50.
|
| 53635 |
-
"eval_steps_per_second": 12.
|
| 53636 |
"num_input_tokens_seen": 44408848,
|
| 53637 |
"step": 32600
|
| 53638 |
},
|
|
@@ -53959,9 +53959,9 @@
|
|
| 53959 |
{
|
| 53960 |
"epoch": 101.23493044822257,
|
| 53961 |
"eval_loss": 2.067155599594116,
|
| 53962 |
-
"eval_runtime": 11.
|
| 53963 |
-
"eval_samples_per_second": 50.
|
| 53964 |
-
"eval_steps_per_second": 12.
|
| 53965 |
"num_input_tokens_seen": 44682912,
|
| 53966 |
"step": 32800
|
| 53967 |
},
|
|
@@ -54288,9 +54288,9 @@
|
|
| 54288 |
{
|
| 54289 |
"epoch": 101.85316846986089,
|
| 54290 |
"eval_loss": 2.042910575866699,
|
| 54291 |
-
"eval_runtime": 11.
|
| 54292 |
-
"eval_samples_per_second": 50.
|
| 54293 |
-
"eval_steps_per_second": 12.
|
| 54294 |
"num_input_tokens_seen": 44956000,
|
| 54295 |
"step": 33000
|
| 54296 |
},
|
|
@@ -54617,9 +54617,9 @@
|
|
| 54617 |
{
|
| 54618 |
"epoch": 102.46986089644513,
|
| 54619 |
"eval_loss": 2.049968719482422,
|
| 54620 |
-
"eval_runtime": 11.
|
| 54621 |
-
"eval_samples_per_second": 50.
|
| 54622 |
-
"eval_steps_per_second": 12.
|
| 54623 |
"num_input_tokens_seen": 45227824,
|
| 54624 |
"step": 33200
|
| 54625 |
},
|
|
@@ -54946,9 +54946,9 @@
|
|
| 54946 |
{
|
| 54947 |
"epoch": 103.08655332302936,
|
| 54948 |
"eval_loss": 2.047577142715454,
|
| 54949 |
-
"eval_runtime": 11.
|
| 54950 |
-
"eval_samples_per_second": 50.
|
| 54951 |
-
"eval_steps_per_second": 12.
|
| 54952 |
"num_input_tokens_seen": 45498320,
|
| 54953 |
"step": 33400
|
| 54954 |
},
|
|
@@ -55275,9 +55275,9 @@
|
|
| 55275 |
{
|
| 55276 |
"epoch": 103.7047913446677,
|
| 55277 |
"eval_loss": 2.0636227130889893,
|
| 55278 |
-
"eval_runtime": 11.
|
| 55279 |
-
"eval_samples_per_second": 50.
|
| 55280 |
-
"eval_steps_per_second": 12.
|
| 55281 |
"num_input_tokens_seen": 45773648,
|
| 55282 |
"step": 33600
|
| 55283 |
},
|
|
@@ -55604,9 +55604,9 @@
|
|
| 55604 |
{
|
| 55605 |
"epoch": 104.32148377125193,
|
| 55606 |
"eval_loss": 2.080799102783203,
|
| 55607 |
-
"eval_runtime": 11.
|
| 55608 |
-
"eval_samples_per_second": 50.
|
| 55609 |
-
"eval_steps_per_second": 12.
|
| 55610 |
"num_input_tokens_seen": 46044128,
|
| 55611 |
"step": 33800
|
| 55612 |
},
|
|
@@ -55933,9 +55933,9 @@
|
|
| 55933 |
{
|
| 55934 |
"epoch": 104.93972179289027,
|
| 55935 |
"eval_loss": 2.0720911026000977,
|
| 55936 |
-
"eval_runtime": 11.
|
| 55937 |
-
"eval_samples_per_second": 50.
|
| 55938 |
-
"eval_steps_per_second": 12.
|
| 55939 |
"num_input_tokens_seen": 46317504,
|
| 55940 |
"step": 34000
|
| 55941 |
},
|
|
@@ -56262,9 +56262,9 @@
|
|
| 56262 |
{
|
| 56263 |
"epoch": 105.5564142194745,
|
| 56264 |
"eval_loss": 2.082965612411499,
|
| 56265 |
-
"eval_runtime": 11.
|
| 56266 |
-
"eval_samples_per_second": 50.
|
| 56267 |
-
"eval_steps_per_second": 12.
|
| 56268 |
"num_input_tokens_seen": 46589024,
|
| 56269 |
"step": 34200
|
| 56270 |
},
|
|
@@ -56591,9 +56591,9 @@
|
|
| 56591 |
{
|
| 56592 |
"epoch": 106.17310664605873,
|
| 56593 |
"eval_loss": 2.094463348388672,
|
| 56594 |
-
"eval_runtime": 11.
|
| 56595 |
-
"eval_samples_per_second": 50.
|
| 56596 |
-
"eval_steps_per_second": 12.
|
| 56597 |
"num_input_tokens_seen": 46863680,
|
| 56598 |
"step": 34400
|
| 56599 |
},
|
|
@@ -56920,9 +56920,9 @@
|
|
| 56920 |
{
|
| 56921 |
"epoch": 106.79134466769706,
|
| 56922 |
"eval_loss": 2.0966665744781494,
|
| 56923 |
-
"eval_runtime": 11.
|
| 56924 |
-
"eval_samples_per_second": 50.
|
| 56925 |
-
"eval_steps_per_second": 12.
|
| 56926 |
"num_input_tokens_seen": 47135520,
|
| 56927 |
"step": 34600
|
| 56928 |
},
|
|
@@ -57249,9 +57249,9 @@
|
|
| 57249 |
{
|
| 57250 |
"epoch": 107.4080370942813,
|
| 57251 |
"eval_loss": 2.1042280197143555,
|
| 57252 |
-
"eval_runtime": 11.
|
| 57253 |
-
"eval_samples_per_second": 50.
|
| 57254 |
-
"eval_steps_per_second": 12.
|
| 57255 |
"num_input_tokens_seen": 47407056,
|
| 57256 |
"step": 34800
|
| 57257 |
},
|
|
@@ -57578,9 +57578,9 @@
|
|
| 57578 |
{
|
| 57579 |
"epoch": 108.02472952086553,
|
| 57580 |
"eval_loss": 2.096859931945801,
|
| 57581 |
-
"eval_runtime": 11.
|
| 57582 |
-
"eval_samples_per_second": 50.
|
| 57583 |
-
"eval_steps_per_second": 12.
|
| 57584 |
"num_input_tokens_seen": 47680112,
|
| 57585 |
"step": 35000
|
| 57586 |
},
|
|
@@ -57907,9 +57907,9 @@
|
|
| 57907 |
{
|
| 57908 |
"epoch": 108.64296754250387,
|
| 57909 |
"eval_loss": 2.1074352264404297,
|
| 57910 |
-
"eval_runtime": 11.
|
| 57911 |
-
"eval_samples_per_second": 50.
|
| 57912 |
-
"eval_steps_per_second": 12.
|
| 57913 |
"num_input_tokens_seen": 47951632,
|
| 57914 |
"step": 35200
|
| 57915 |
},
|
|
@@ -58236,9 +58236,9 @@
|
|
| 58236 |
{
|
| 58237 |
"epoch": 109.2596599690881,
|
| 58238 |
"eval_loss": 2.110261917114258,
|
| 58239 |
-
"eval_runtime": 11.
|
| 58240 |
-
"eval_samples_per_second": 50.
|
| 58241 |
-
"eval_steps_per_second": 12.
|
| 58242 |
"num_input_tokens_seen": 48224016,
|
| 58243 |
"step": 35400
|
| 58244 |
},
|
|
@@ -58565,9 +58565,9 @@
|
|
| 58565 |
{
|
| 58566 |
"epoch": 109.87789799072642,
|
| 58567 |
"eval_loss": 2.1071767807006836,
|
| 58568 |
-
"eval_runtime": 11.
|
| 58569 |
-
"eval_samples_per_second": 50.
|
| 58570 |
-
"eval_steps_per_second": 12.
|
| 58571 |
"num_input_tokens_seen": 48497072,
|
| 58572 |
"step": 35600
|
| 58573 |
},
|
|
@@ -58894,9 +58894,9 @@
|
|
| 58894 |
{
|
| 58895 |
"epoch": 110.49459041731066,
|
| 58896 |
"eval_loss": 2.1081290245056152,
|
| 58897 |
-
"eval_runtime": 11.
|
| 58898 |
-
"eval_samples_per_second": 50.
|
| 58899 |
-
"eval_steps_per_second": 12.
|
| 58900 |
"num_input_tokens_seen": 48768624,
|
| 58901 |
"step": 35800
|
| 58902 |
},
|
|
@@ -59223,9 +59223,9 @@
|
|
| 59223 |
{
|
| 59224 |
"epoch": 111.1112828438949,
|
| 59225 |
"eval_loss": 2.1115777492523193,
|
| 59226 |
-
"eval_runtime": 11.
|
| 59227 |
-
"eval_samples_per_second": 50.
|
| 59228 |
-
"eval_steps_per_second": 12.
|
| 59229 |
"num_input_tokens_seen": 49041488,
|
| 59230 |
"step": 36000
|
| 59231 |
},
|
|
@@ -59552,9 +59552,9 @@
|
|
| 59552 |
{
|
| 59553 |
"epoch": 111.72952086553323,
|
| 59554 |
"eval_loss": 2.1243085861206055,
|
| 59555 |
-
"eval_runtime": 11.
|
| 59556 |
-
"eval_samples_per_second": 50.
|
| 59557 |
-
"eval_steps_per_second": 12.
|
| 59558 |
"num_input_tokens_seen": 49314352,
|
| 59559 |
"step": 36200
|
| 59560 |
},
|
|
@@ -59881,9 +59881,9 @@
|
|
| 59881 |
{
|
| 59882 |
"epoch": 112.34621329211747,
|
| 59883 |
"eval_loss": 2.1214993000030518,
|
| 59884 |
-
"eval_runtime": 11.
|
| 59885 |
-
"eval_samples_per_second": 50.
|
| 59886 |
-
"eval_steps_per_second": 12.
|
| 59887 |
"num_input_tokens_seen": 49584848,
|
| 59888 |
"step": 36400
|
| 59889 |
},
|
|
@@ -60210,9 +60210,9 @@
|
|
| 60210 |
{
|
| 60211 |
"epoch": 112.9644513137558,
|
| 60212 |
"eval_loss": 2.1198999881744385,
|
| 60213 |
-
"eval_runtime": 11.
|
| 60214 |
-
"eval_samples_per_second": 50.
|
| 60215 |
-
"eval_steps_per_second": 12.
|
| 60216 |
"num_input_tokens_seen": 49858864,
|
| 60217 |
"step": 36600
|
| 60218 |
},
|
|
@@ -60539,9 +60539,9 @@
|
|
| 60539 |
{
|
| 60540 |
"epoch": 113.58114374034002,
|
| 60541 |
"eval_loss": 2.129167079925537,
|
| 60542 |
-
"eval_runtime": 11.
|
| 60543 |
-
"eval_samples_per_second": 50.
|
| 60544 |
-
"eval_steps_per_second": 12.
|
| 60545 |
"num_input_tokens_seen": 50130000,
|
| 60546 |
"step": 36800
|
| 60547 |
},
|
|
@@ -60868,9 +60868,9 @@
|
|
| 60868 |
{
|
| 60869 |
"epoch": 114.19783616692426,
|
| 60870 |
"eval_loss": 2.127554178237915,
|
| 60871 |
-
"eval_runtime": 11.
|
| 60872 |
-
"eval_samples_per_second": 50.
|
| 60873 |
-
"eval_steps_per_second": 12.
|
| 60874 |
"num_input_tokens_seen": 50404128,
|
| 60875 |
"step": 37000
|
| 60876 |
},
|
|
@@ -61197,9 +61197,9 @@
|
|
| 61197 |
{
|
| 61198 |
"epoch": 114.8160741885626,
|
| 61199 |
"eval_loss": 2.1345906257629395,
|
| 61200 |
-
"eval_runtime": 11.
|
| 61201 |
-
"eval_samples_per_second": 50.
|
| 61202 |
-
"eval_steps_per_second": 12.
|
| 61203 |
"num_input_tokens_seen": 50678112,
|
| 61204 |
"step": 37200
|
| 61205 |
},
|
|
@@ -61526,9 +61526,9 @@
|
|
| 61526 |
{
|
| 61527 |
"epoch": 115.43276661514683,
|
| 61528 |
"eval_loss": 2.132272481918335,
|
| 61529 |
-
"eval_runtime": 11.
|
| 61530 |
-
"eval_samples_per_second": 50.
|
| 61531 |
-
"eval_steps_per_second": 12.
|
| 61532 |
"num_input_tokens_seen": 50946800,
|
| 61533 |
"step": 37400
|
| 61534 |
},
|
|
@@ -61855,9 +61855,9 @@
|
|
| 61855 |
{
|
| 61856 |
"epoch": 116.04945904173107,
|
| 61857 |
"eval_loss": 2.131873369216919,
|
| 61858 |
-
"eval_runtime": 11.
|
| 61859 |
-
"eval_samples_per_second": 50.
|
| 61860 |
-
"eval_steps_per_second": 12.
|
| 61861 |
"num_input_tokens_seen": 51219680,
|
| 61862 |
"step": 37600
|
| 61863 |
},
|
|
@@ -62184,9 +62184,9 @@
|
|
| 62184 |
{
|
| 62185 |
"epoch": 116.6676970633694,
|
| 62186 |
"eval_loss": 2.1323955059051514,
|
| 62187 |
-
"eval_runtime": 11.
|
| 62188 |
-
"eval_samples_per_second": 50.
|
| 62189 |
-
"eval_steps_per_second": 12.
|
| 62190 |
"num_input_tokens_seen": 51492544,
|
| 62191 |
"step": 37800
|
| 62192 |
},
|
|
@@ -62513,9 +62513,9 @@
|
|
| 62513 |
{
|
| 62514 |
"epoch": 117.28438948995363,
|
| 62515 |
"eval_loss": 2.135064125061035,
|
| 62516 |
-
"eval_runtime": 11.
|
| 62517 |
-
"eval_samples_per_second":
|
| 62518 |
-
"eval_steps_per_second": 12.
|
| 62519 |
"num_input_tokens_seen": 51764160,
|
| 62520 |
"step": 38000
|
| 62521 |
},
|
|
@@ -62842,9 +62842,9 @@
|
|
| 62842 |
{
|
| 62843 |
"epoch": 117.90262751159196,
|
| 62844 |
"eval_loss": 2.134946584701538,
|
| 62845 |
-
"eval_runtime": 11.
|
| 62846 |
-
"eval_samples_per_second":
|
| 62847 |
-
"eval_steps_per_second": 12.
|
| 62848 |
"num_input_tokens_seen": 52039488,
|
| 62849 |
"step": 38200
|
| 62850 |
},
|
|
@@ -63171,9 +63171,9 @@
|
|
| 63171 |
{
|
| 63172 |
"epoch": 118.5193199381762,
|
| 63173 |
"eval_loss": 2.1382298469543457,
|
| 63174 |
-
"eval_runtime": 11.
|
| 63175 |
-
"eval_samples_per_second":
|
| 63176 |
-
"eval_steps_per_second": 12.
|
| 63177 |
"num_input_tokens_seen": 52311648,
|
| 63178 |
"step": 38400
|
| 63179 |
},
|
|
@@ -63500,9 +63500,9 @@
|
|
| 63500 |
{
|
| 63501 |
"epoch": 119.13601236476043,
|
| 63502 |
"eval_loss": 2.1389503479003906,
|
| 63503 |
-
"eval_runtime": 11.
|
| 63504 |
-
"eval_samples_per_second":
|
| 63505 |
-
"eval_steps_per_second": 12.
|
| 63506 |
"num_input_tokens_seen": 52584960,
|
| 63507 |
"step": 38600
|
| 63508 |
},
|
|
@@ -63829,9 +63829,9 @@
|
|
| 63829 |
{
|
| 63830 |
"epoch": 119.75425038639877,
|
| 63831 |
"eval_loss": 2.141028642654419,
|
| 63832 |
-
"eval_runtime": 11.
|
| 63833 |
-
"eval_samples_per_second":
|
| 63834 |
-
"eval_steps_per_second": 12.
|
| 63835 |
"num_input_tokens_seen": 52855712,
|
| 63836 |
"step": 38800
|
| 63837 |
},
|
|
@@ -64158,9 +64158,9 @@
|
|
| 64158 |
{
|
| 64159 |
"epoch": 120.370942812983,
|
| 64160 |
"eval_loss": 2.142845630645752,
|
| 64161 |
-
"eval_runtime":
|
| 64162 |
-
"eval_samples_per_second":
|
| 64163 |
-
"eval_steps_per_second":
|
| 64164 |
"num_input_tokens_seen": 53128480,
|
| 64165 |
"step": 39000
|
| 64166 |
},
|
|
@@ -64487,9 +64487,9 @@
|
|
| 64487 |
{
|
| 64488 |
"epoch": 120.98918083462132,
|
| 64489 |
"eval_loss": 2.142850637435913,
|
| 64490 |
-
"eval_runtime": 11.
|
| 64491 |
-
"eval_samples_per_second":
|
| 64492 |
-
"eval_steps_per_second": 12.
|
| 64493 |
"num_input_tokens_seen": 53401056,
|
| 64494 |
"step": 39200
|
| 64495 |
},
|
|
@@ -64816,9 +64816,9 @@
|
|
| 64816 |
{
|
| 64817 |
"epoch": 121.60587326120556,
|
| 64818 |
"eval_loss": 2.141234874725342,
|
| 64819 |
-
"eval_runtime": 11.
|
| 64820 |
-
"eval_samples_per_second":
|
| 64821 |
-
"eval_steps_per_second": 12.
|
| 64822 |
"num_input_tokens_seen": 53673600,
|
| 64823 |
"step": 39400
|
| 64824 |
},
|
|
@@ -65145,9 +65145,9 @@
|
|
| 65145 |
{
|
| 65146 |
"epoch": 122.2225656877898,
|
| 65147 |
"eval_loss": 2.1376492977142334,
|
| 65148 |
-
"eval_runtime": 11.
|
| 65149 |
-
"eval_samples_per_second":
|
| 65150 |
-
"eval_steps_per_second": 12.
|
| 65151 |
"num_input_tokens_seen": 53943712,
|
| 65152 |
"step": 39600
|
| 65153 |
},
|
|
@@ -65474,9 +65474,9 @@
|
|
| 65474 |
{
|
| 65475 |
"epoch": 122.84080370942813,
|
| 65476 |
"eval_loss": 2.1381330490112305,
|
| 65477 |
-
"eval_runtime": 11.
|
| 65478 |
-
"eval_samples_per_second":
|
| 65479 |
-
"eval_steps_per_second": 12.
|
| 65480 |
"num_input_tokens_seen": 54217344,
|
| 65481 |
"step": 39800
|
| 65482 |
},
|
|
@@ -65803,9 +65803,9 @@
|
|
| 65803 |
{
|
| 65804 |
"epoch": 123.45749613601237,
|
| 65805 |
"eval_loss": 2.137033700942993,
|
| 65806 |
-
"eval_runtime": 11.
|
| 65807 |
-
"eval_samples_per_second": 50.
|
| 65808 |
-
"eval_steps_per_second": 12.
|
| 65809 |
"num_input_tokens_seen": 54490336,
|
| 65810 |
"step": 40000
|
| 65811 |
},
|
|
@@ -65815,9 +65815,9 @@
|
|
| 65815 |
"step": 40000,
|
| 65816 |
"total_flos": 2.453675202191819e+18,
|
| 65817 |
"train_loss": 0.10362623064493919,
|
| 65818 |
-
"train_runtime":
|
| 65819 |
-
"train_samples_per_second": 21.
|
| 65820 |
-
"train_steps_per_second": 1.
|
| 65821 |
}
|
| 65822 |
],
|
| 65823 |
"logging_steps": 5,
|
|
|
|
| 332 |
{
|
| 333 |
"epoch": 0.6182380216383307,
|
| 334 |
"eval_loss": 0.9577658176422119,
|
| 335 |
+
"eval_runtime": 11.2706,
|
| 336 |
+
"eval_samples_per_second": 51.018,
|
| 337 |
+
"eval_steps_per_second": 12.777,
|
| 338 |
"num_input_tokens_seen": 272576,
|
| 339 |
"step": 200
|
| 340 |
},
|
|
|
|
| 661 |
{
|
| 662 |
"epoch": 1.2349304482225656,
|
| 663 |
"eval_loss": 0.7184381484985352,
|
| 664 |
+
"eval_runtime": 11.278,
|
| 665 |
+
"eval_samples_per_second": 50.984,
|
| 666 |
+
"eval_steps_per_second": 12.768,
|
| 667 |
"num_input_tokens_seen": 544096,
|
| 668 |
"step": 400
|
| 669 |
},
|
|
|
|
| 990 |
{
|
| 991 |
"epoch": 1.8531684698608966,
|
| 992 |
"eval_loss": 0.6815493106842041,
|
| 993 |
+
"eval_runtime": 11.2856,
|
| 994 |
+
"eval_samples_per_second": 50.95,
|
| 995 |
+
"eval_steps_per_second": 12.76,
|
| 996 |
"num_input_tokens_seen": 818048,
|
| 997 |
"step": 600
|
| 998 |
},
|
|
|
|
| 1319 |
{
|
| 1320 |
"epoch": 2.469860896445131,
|
| 1321 |
"eval_loss": 0.6753404140472412,
|
| 1322 |
+
"eval_runtime": 11.2983,
|
| 1323 |
+
"eval_samples_per_second": 50.893,
|
| 1324 |
+
"eval_steps_per_second": 12.745,
|
| 1325 |
"num_input_tokens_seen": 1089600,
|
| 1326 |
"step": 800
|
| 1327 |
},
|
|
|
|
| 1648 |
{
|
| 1649 |
"epoch": 3.0865533230293662,
|
| 1650 |
"eval_loss": 0.6587666273117065,
|
| 1651 |
+
"eval_runtime": 11.289,
|
| 1652 |
+
"eval_samples_per_second": 50.935,
|
| 1653 |
+
"eval_steps_per_second": 12.756,
|
| 1654 |
"num_input_tokens_seen": 1361504,
|
| 1655 |
"step": 1000
|
| 1656 |
},
|
|
|
|
| 1977 |
{
|
| 1978 |
"epoch": 3.704791344667697,
|
| 1979 |
"eval_loss": 0.6507958173751831,
|
| 1980 |
+
"eval_runtime": 11.2821,
|
| 1981 |
+
"eval_samples_per_second": 50.966,
|
| 1982 |
+
"eval_steps_per_second": 12.764,
|
| 1983 |
"num_input_tokens_seen": 1636960,
|
| 1984 |
"step": 1200
|
| 1985 |
},
|
|
|
|
| 2306 |
{
|
| 2307 |
"epoch": 4.321483771251932,
|
| 2308 |
"eval_loss": 0.6580312252044678,
|
| 2309 |
+
"eval_runtime": 11.2952,
|
| 2310 |
+
"eval_samples_per_second": 50.907,
|
| 2311 |
+
"eval_steps_per_second": 12.749,
|
| 2312 |
"num_input_tokens_seen": 1909696,
|
| 2313 |
"step": 1400
|
| 2314 |
},
|
|
|
|
| 2635 |
{
|
| 2636 |
"epoch": 4.939721792890262,
|
| 2637 |
"eval_loss": 0.6381492614746094,
|
| 2638 |
+
"eval_runtime": 11.2842,
|
| 2639 |
+
"eval_samples_per_second": 50.956,
|
| 2640 |
+
"eval_steps_per_second": 12.761,
|
| 2641 |
"num_input_tokens_seen": 2182656,
|
| 2642 |
"step": 1600
|
| 2643 |
},
|
|
|
|
| 2964 |
{
|
| 2965 |
"epoch": 5.556414219474497,
|
| 2966 |
"eval_loss": 0.6330167055130005,
|
| 2967 |
+
"eval_runtime": 11.288,
|
| 2968 |
+
"eval_samples_per_second": 50.939,
|
| 2969 |
+
"eval_steps_per_second": 12.757,
|
| 2970 |
"num_input_tokens_seen": 2453904,
|
| 2971 |
"step": 1800
|
| 2972 |
},
|
|
|
|
| 3293 |
{
|
| 3294 |
"epoch": 6.1731066460587325,
|
| 3295 |
"eval_loss": 0.6232376098632812,
|
| 3296 |
+
"eval_runtime": 11.318,
|
| 3297 |
+
"eval_samples_per_second": 50.804,
|
| 3298 |
+
"eval_steps_per_second": 12.723,
|
| 3299 |
"num_input_tokens_seen": 2727984,
|
| 3300 |
"step": 2000
|
| 3301 |
},
|
|
|
|
| 3622 |
{
|
| 3623 |
"epoch": 6.7913446676970635,
|
| 3624 |
"eval_loss": 0.6167892813682556,
|
| 3625 |
+
"eval_runtime": 11.2955,
|
| 3626 |
+
"eval_samples_per_second": 50.905,
|
| 3627 |
+
"eval_steps_per_second": 12.748,
|
| 3628 |
"num_input_tokens_seen": 2999760,
|
| 3629 |
"step": 2200
|
| 3630 |
},
|
|
|
|
| 3951 |
{
|
| 3952 |
"epoch": 7.4080370942812985,
|
| 3953 |
"eval_loss": 0.5621501207351685,
|
| 3954 |
+
"eval_runtime": 11.2909,
|
| 3955 |
+
"eval_samples_per_second": 50.926,
|
| 3956 |
+
"eval_steps_per_second": 12.754,
|
| 3957 |
"num_input_tokens_seen": 3274528,
|
| 3958 |
"step": 2400
|
| 3959 |
},
|
|
|
|
| 4280 |
{
|
| 4281 |
"epoch": 8.024729520865533,
|
| 4282 |
"eval_loss": 0.5813793540000916,
|
| 4283 |
+
"eval_runtime": 11.2998,
|
| 4284 |
+
"eval_samples_per_second": 50.886,
|
| 4285 |
+
"eval_steps_per_second": 12.744,
|
| 4286 |
"num_input_tokens_seen": 3546880,
|
| 4287 |
"step": 2600
|
| 4288 |
},
|
|
|
|
| 4609 |
{
|
| 4610 |
"epoch": 8.642967542503865,
|
| 4611 |
"eval_loss": 0.5915025472640991,
|
| 4612 |
+
"eval_runtime": 11.2948,
|
| 4613 |
+
"eval_samples_per_second": 50.909,
|
| 4614 |
+
"eval_steps_per_second": 12.749,
|
| 4615 |
"num_input_tokens_seen": 3821184,
|
| 4616 |
"step": 2800
|
| 4617 |
},
|
|
|
|
| 4938 |
{
|
| 4939 |
"epoch": 9.2596599690881,
|
| 4940 |
"eval_loss": 0.5584082007408142,
|
| 4941 |
+
"eval_runtime": 11.2865,
|
| 4942 |
+
"eval_samples_per_second": 50.946,
|
| 4943 |
+
"eval_steps_per_second": 12.759,
|
| 4944 |
"num_input_tokens_seen": 4090704,
|
| 4945 |
"step": 3000
|
| 4946 |
},
|
|
|
|
| 5267 |
{
|
| 5268 |
"epoch": 9.87789799072643,
|
| 5269 |
"eval_loss": 0.562062680721283,
|
| 5270 |
+
"eval_runtime": 11.2889,
|
| 5271 |
+
"eval_samples_per_second": 50.935,
|
| 5272 |
+
"eval_steps_per_second": 12.756,
|
| 5273 |
"num_input_tokens_seen": 4363696,
|
| 5274 |
"step": 3200
|
| 5275 |
},
|
|
|
|
| 5596 |
{
|
| 5597 |
"epoch": 10.494590417310665,
|
| 5598 |
"eval_loss": 0.5493518114089966,
|
| 5599 |
+
"eval_runtime": 11.2948,
|
| 5600 |
+
"eval_samples_per_second": 50.909,
|
| 5601 |
+
"eval_steps_per_second": 12.749,
|
| 5602 |
"num_input_tokens_seen": 4636656,
|
| 5603 |
"step": 3400
|
| 5604 |
},
|
|
|
|
| 5925 |
{
|
| 5926 |
"epoch": 11.1112828438949,
|
| 5927 |
"eval_loss": 0.5832644701004028,
|
| 5928 |
+
"eval_runtime": 11.2861,
|
| 5929 |
+
"eval_samples_per_second": 50.948,
|
| 5930 |
+
"eval_steps_per_second": 12.759,
|
| 5931 |
"num_input_tokens_seen": 4908928,
|
| 5932 |
"step": 3600
|
| 5933 |
},
|
|
|
|
| 6254 |
{
|
| 6255 |
"epoch": 11.72952086553323,
|
| 6256 |
"eval_loss": 0.5668447017669678,
|
| 6257 |
+
"eval_runtime": 11.3543,
|
| 6258 |
+
"eval_samples_per_second": 50.641,
|
| 6259 |
+
"eval_steps_per_second": 12.682,
|
| 6260 |
"num_input_tokens_seen": 5179040,
|
| 6261 |
"step": 3800
|
| 6262 |
},
|
|
|
|
| 6583 |
{
|
| 6584 |
"epoch": 12.346213292117465,
|
| 6585 |
"eval_loss": 0.5749086737632751,
|
| 6586 |
+
"eval_runtime": 11.3124,
|
| 6587 |
+
"eval_samples_per_second": 50.829,
|
| 6588 |
+
"eval_steps_per_second": 12.729,
|
| 6589 |
"num_input_tokens_seen": 5452192,
|
| 6590 |
"step": 4000
|
| 6591 |
},
|
|
|
|
| 6912 |
{
|
| 6913 |
"epoch": 12.964451313755795,
|
| 6914 |
"eval_loss": 0.564673900604248,
|
| 6915 |
+
"eval_runtime": 11.2981,
|
| 6916 |
+
"eval_samples_per_second": 50.894,
|
| 6917 |
+
"eval_steps_per_second": 12.746,
|
| 6918 |
"num_input_tokens_seen": 5724448,
|
| 6919 |
"step": 4200
|
| 6920 |
},
|
|
|
|
| 7241 |
{
|
| 7242 |
"epoch": 13.58114374034003,
|
| 7243 |
"eval_loss": 0.557171642780304,
|
| 7244 |
+
"eval_runtime": 11.2883,
|
| 7245 |
+
"eval_samples_per_second": 50.938,
|
| 7246 |
+
"eval_steps_per_second": 12.757,
|
| 7247 |
"num_input_tokens_seen": 5998032,
|
| 7248 |
"step": 4400
|
| 7249 |
},
|
|
|
|
| 7570 |
{
|
| 7571 |
"epoch": 14.197836166924265,
|
| 7572 |
"eval_loss": 0.5686624050140381,
|
| 7573 |
+
"eval_runtime": 11.2932,
|
| 7574 |
+
"eval_samples_per_second": 50.916,
|
| 7575 |
+
"eval_steps_per_second": 12.751,
|
| 7576 |
"num_input_tokens_seen": 6269792,
|
| 7577 |
"step": 4600
|
| 7578 |
},
|
|
|
|
| 7899 |
{
|
| 7900 |
"epoch": 14.816074188562597,
|
| 7901 |
"eval_loss": 0.5626024603843689,
|
| 7902 |
+
"eval_runtime": 11.2851,
|
| 7903 |
+
"eval_samples_per_second": 50.952,
|
| 7904 |
+
"eval_steps_per_second": 12.76,
|
| 7905 |
"num_input_tokens_seen": 6541248,
|
| 7906 |
"step": 4800
|
| 7907 |
},
|
|
|
|
| 8228 |
{
|
| 8229 |
"epoch": 15.432766615146832,
|
| 8230 |
"eval_loss": 0.5851988196372986,
|
| 8231 |
+
"eval_runtime": 11.2997,
|
| 8232 |
+
"eval_samples_per_second": 50.886,
|
| 8233 |
+
"eval_steps_per_second": 12.744,
|
| 8234 |
"num_input_tokens_seen": 6815200,
|
| 8235 |
"step": 5000
|
| 8236 |
},
|
|
|
|
| 8557 |
{
|
| 8558 |
"epoch": 16.049459041731065,
|
| 8559 |
"eval_loss": 0.6189093589782715,
|
| 8560 |
+
"eval_runtime": 11.3118,
|
| 8561 |
+
"eval_samples_per_second": 50.832,
|
| 8562 |
+
"eval_steps_per_second": 12.73,
|
| 8563 |
"num_input_tokens_seen": 7086224,
|
| 8564 |
"step": 5200
|
| 8565 |
},
|
|
|
|
| 8886 |
{
|
| 8887 |
"epoch": 16.667697063369395,
|
| 8888 |
"eval_loss": 0.6122633814811707,
|
| 8889 |
+
"eval_runtime": 11.3022,
|
| 8890 |
+
"eval_samples_per_second": 50.875,
|
| 8891 |
+
"eval_steps_per_second": 12.741,
|
| 8892 |
"num_input_tokens_seen": 7360560,
|
| 8893 |
"step": 5400
|
| 8894 |
},
|
|
|
|
| 9215 |
{
|
| 9216 |
"epoch": 17.284389489953632,
|
| 9217 |
"eval_loss": 0.611182689666748,
|
| 9218 |
+
"eval_runtime": 11.2991,
|
| 9219 |
+
"eval_samples_per_second": 50.889,
|
| 9220 |
+
"eval_steps_per_second": 12.744,
|
| 9221 |
"num_input_tokens_seen": 7632240,
|
| 9222 |
"step": 5600
|
| 9223 |
},
|
|
|
|
| 9544 |
{
|
| 9545 |
"epoch": 17.902627511591962,
|
| 9546 |
"eval_loss": 0.5843232274055481,
|
| 9547 |
+
"eval_runtime": 11.2893,
|
| 9548 |
+
"eval_samples_per_second": 50.933,
|
| 9549 |
+
"eval_steps_per_second": 12.755,
|
| 9550 |
"num_input_tokens_seen": 7904432,
|
| 9551 |
"step": 5800
|
| 9552 |
},
|
|
|
|
| 9873 |
{
|
| 9874 |
"epoch": 18.5193199381762,
|
| 9875 |
"eval_loss": 0.6198561191558838,
|
| 9876 |
+
"eval_runtime": 11.3112,
|
| 9877 |
+
"eval_samples_per_second": 50.835,
|
| 9878 |
+
"eval_steps_per_second": 12.731,
|
| 9879 |
"num_input_tokens_seen": 8177168,
|
| 9880 |
"step": 6000
|
| 9881 |
},
|
|
|
|
| 10202 |
{
|
| 10203 |
"epoch": 19.136012364760433,
|
| 10204 |
"eval_loss": 0.6794010996818542,
|
| 10205 |
+
"eval_runtime": 11.2936,
|
| 10206 |
+
"eval_samples_per_second": 50.914,
|
| 10207 |
+
"eval_steps_per_second": 12.751,
|
| 10208 |
"num_input_tokens_seen": 8449968,
|
| 10209 |
"step": 6200
|
| 10210 |
},
|
|
|
|
| 10531 |
{
|
| 10532 |
"epoch": 19.754250386398763,
|
| 10533 |
"eval_loss": 0.6374606490135193,
|
| 10534 |
+
"eval_runtime": 11.2831,
|
| 10535 |
+
"eval_samples_per_second": 50.961,
|
| 10536 |
+
"eval_steps_per_second": 12.762,
|
| 10537 |
"num_input_tokens_seen": 8722992,
|
| 10538 |
"step": 6400
|
| 10539 |
},
|
|
|
|
| 10860 |
{
|
| 10861 |
"epoch": 20.370942812983,
|
| 10862 |
"eval_loss": 0.6705669164657593,
|
| 10863 |
+
"eval_runtime": 11.2994,
|
| 10864 |
+
"eval_samples_per_second": 50.888,
|
| 10865 |
+
"eval_steps_per_second": 12.744,
|
| 10866 |
"num_input_tokens_seen": 8996224,
|
| 10867 |
"step": 6600
|
| 10868 |
},
|
|
|
|
| 11189 |
{
|
| 11190 |
"epoch": 20.98918083462133,
|
| 11191 |
"eval_loss": 0.648054838180542,
|
| 11192 |
+
"eval_runtime": 11.2898,
|
| 11193 |
+
"eval_samples_per_second": 50.931,
|
| 11194 |
+
"eval_steps_per_second": 12.755,
|
| 11195 |
"num_input_tokens_seen": 9269504,
|
| 11196 |
"step": 6800
|
| 11197 |
},
|
|
|
|
| 11518 |
{
|
| 11519 |
"epoch": 21.605873261205563,
|
| 11520 |
"eval_loss": 0.7299332618713379,
|
| 11521 |
+
"eval_runtime": 11.3095,
|
| 11522 |
+
"eval_samples_per_second": 50.842,
|
| 11523 |
+
"eval_steps_per_second": 12.733,
|
| 11524 |
"num_input_tokens_seen": 9542432,
|
| 11525 |
"step": 7000
|
| 11526 |
},
|
|
|
|
| 11847 |
{
|
| 11848 |
"epoch": 22.2225656877898,
|
| 11849 |
"eval_loss": 0.7840644717216492,
|
| 11850 |
+
"eval_runtime": 11.3076,
|
| 11851 |
+
"eval_samples_per_second": 50.851,
|
| 11852 |
+
"eval_steps_per_second": 12.735,
|
| 11853 |
"num_input_tokens_seen": 9812704,
|
| 11854 |
"step": 7200
|
| 11855 |
},
|
|
|
|
| 12176 |
{
|
| 12177 |
"epoch": 22.84080370942813,
|
| 12178 |
"eval_loss": 0.7381678819656372,
|
| 12179 |
+
"eval_runtime": 11.3001,
|
| 12180 |
+
"eval_samples_per_second": 50.885,
|
| 12181 |
+
"eval_steps_per_second": 12.743,
|
| 12182 |
"num_input_tokens_seen": 10086272,
|
| 12183 |
"step": 7400
|
| 12184 |
},
|
|
|
|
| 12505 |
{
|
| 12506 |
"epoch": 23.457496136012363,
|
| 12507 |
"eval_loss": 0.7728149890899658,
|
| 12508 |
+
"eval_runtime": 11.2886,
|
| 12509 |
+
"eval_samples_per_second": 50.936,
|
| 12510 |
+
"eval_steps_per_second": 12.756,
|
| 12511 |
"num_input_tokens_seen": 10358832,
|
| 12512 |
"step": 7600
|
| 12513 |
},
|
|
|
|
| 12834 |
{
|
| 12835 |
"epoch": 24.0741885625966,
|
| 12836 |
"eval_loss": 0.8268849849700928,
|
| 12837 |
+
"eval_runtime": 11.2998,
|
| 12838 |
+
"eval_samples_per_second": 50.886,
|
| 12839 |
+
"eval_steps_per_second": 12.744,
|
| 12840 |
"num_input_tokens_seen": 10630000,
|
| 12841 |
"step": 7800
|
| 12842 |
},
|
|
|
|
| 13163 |
{
|
| 13164 |
"epoch": 24.69242658423493,
|
| 13165 |
"eval_loss": 0.8175145983695984,
|
| 13166 |
+
"eval_runtime": 11.2902,
|
| 13167 |
+
"eval_samples_per_second": 50.929,
|
| 13168 |
+
"eval_steps_per_second": 12.754,
|
| 13169 |
"num_input_tokens_seen": 10904880,
|
| 13170 |
"step": 8000
|
| 13171 |
},
|
|
|
|
| 13492 |
{
|
| 13493 |
"epoch": 25.309119010819167,
|
| 13494 |
"eval_loss": 0.8719689249992371,
|
| 13495 |
+
"eval_runtime": 11.2821,
|
| 13496 |
+
"eval_samples_per_second": 50.965,
|
| 13497 |
+
"eval_steps_per_second": 12.764,
|
| 13498 |
"num_input_tokens_seen": 11176208,
|
| 13499 |
"step": 8200
|
| 13500 |
},
|
|
|
|
| 13821 |
{
|
| 13822 |
"epoch": 25.927357032457497,
|
| 13823 |
"eval_loss": 0.9041878581047058,
|
| 13824 |
+
"eval_runtime": 11.2871,
|
| 13825 |
+
"eval_samples_per_second": 50.943,
|
| 13826 |
+
"eval_steps_per_second": 12.758,
|
| 13827 |
"num_input_tokens_seen": 11451344,
|
| 13828 |
"step": 8400
|
| 13829 |
},
|
|
|
|
| 14150 |
{
|
| 14151 |
"epoch": 26.54404945904173,
|
| 14152 |
"eval_loss": 0.8620166778564453,
|
| 14153 |
+
"eval_runtime": 11.3068,
|
| 14154 |
+
"eval_samples_per_second": 50.854,
|
| 14155 |
+
"eval_steps_per_second": 12.736,
|
| 14156 |
"num_input_tokens_seen": 11723328,
|
| 14157 |
"step": 8600
|
| 14158 |
},
|
|
|
|
| 14479 |
{
|
| 14480 |
"epoch": 27.160741885625967,
|
| 14481 |
"eval_loss": 0.9756768345832825,
|
| 14482 |
+
"eval_runtime": 11.2984,
|
| 14483 |
+
"eval_samples_per_second": 50.892,
|
| 14484 |
+
"eval_steps_per_second": 12.745,
|
| 14485 |
"num_input_tokens_seen": 11996224,
|
| 14486 |
"step": 8800
|
| 14487 |
},
|
|
|
|
| 14808 |
{
|
| 14809 |
"epoch": 27.778979907264297,
|
| 14810 |
"eval_loss": 0.9385554194450378,
|
| 14811 |
+
"eval_runtime": 11.3054,
|
| 14812 |
+
"eval_samples_per_second": 50.86,
|
| 14813 |
+
"eval_steps_per_second": 12.737,
|
| 14814 |
"num_input_tokens_seen": 12267520,
|
| 14815 |
"step": 9000
|
| 14816 |
},
|
|
|
|
| 15137 |
{
|
| 15138 |
"epoch": 28.39567233384853,
|
| 15139 |
"eval_loss": 0.9237757921218872,
|
| 15140 |
+
"eval_runtime": 11.3005,
|
| 15141 |
+
"eval_samples_per_second": 50.883,
|
| 15142 |
+
"eval_steps_per_second": 12.743,
|
| 15143 |
"num_input_tokens_seen": 12542064,
|
| 15144 |
"step": 9200
|
| 15145 |
},
|
|
|
|
| 15466 |
{
|
| 15467 |
"epoch": 29.012364760432767,
|
| 15468 |
"eval_loss": 1.064571499824524,
|
| 15469 |
+
"eval_runtime": 11.2975,
|
| 15470 |
+
"eval_samples_per_second": 50.896,
|
| 15471 |
+
"eval_steps_per_second": 12.746,
|
| 15472 |
"num_input_tokens_seen": 12812048,
|
| 15473 |
"step": 9400
|
| 15474 |
},
|
|
|
|
| 15795 |
{
|
| 15796 |
"epoch": 29.630602782071097,
|
| 15797 |
"eval_loss": 1.0749653577804565,
|
| 15798 |
+
"eval_runtime": 11.2962,
|
| 15799 |
+
"eval_samples_per_second": 50.902,
|
| 15800 |
+
"eval_steps_per_second": 12.748,
|
| 15801 |
"num_input_tokens_seen": 13085264,
|
| 15802 |
"step": 9600
|
| 15803 |
},
|
|
|
|
| 16124 |
{
|
| 16125 |
"epoch": 30.24729520865533,
|
| 16126 |
"eval_loss": 1.0077648162841797,
|
| 16127 |
+
"eval_runtime": 11.3049,
|
| 16128 |
+
"eval_samples_per_second": 50.863,
|
| 16129 |
+
"eval_steps_per_second": 12.738,
|
| 16130 |
"num_input_tokens_seen": 13356384,
|
| 16131 |
"step": 9800
|
| 16132 |
},
|
|
|
|
| 16453 |
{
|
| 16454 |
"epoch": 30.865533230293664,
|
| 16455 |
"eval_loss": 1.057982325553894,
|
| 16456 |
+
"eval_runtime": 11.3018,
|
| 16457 |
+
"eval_samples_per_second": 50.877,
|
| 16458 |
+
"eval_steps_per_second": 12.741,
|
| 16459 |
"num_input_tokens_seen": 13629216,
|
| 16460 |
"step": 10000
|
| 16461 |
},
|
|
|
|
| 16782 |
{
|
| 16783 |
"epoch": 31.482225656877898,
|
| 16784 |
"eval_loss": 1.0450738668441772,
|
| 16785 |
+
"eval_runtime": 11.2984,
|
| 16786 |
+
"eval_samples_per_second": 50.892,
|
| 16787 |
+
"eval_steps_per_second": 12.745,
|
| 16788 |
"num_input_tokens_seen": 13902736,
|
| 16789 |
"step": 10200
|
| 16790 |
},
|
|
|
|
| 17111 |
{
|
| 17112 |
"epoch": 32.09891808346213,
|
| 17113 |
"eval_loss": 1.0477303266525269,
|
| 17114 |
+
"eval_runtime": 11.2927,
|
| 17115 |
+
"eval_samples_per_second": 50.918,
|
| 17116 |
+
"eval_steps_per_second": 12.752,
|
| 17117 |
"num_input_tokens_seen": 14174192,
|
| 17118 |
"step": 10400
|
| 17119 |
},
|
|
|
|
| 17440 |
{
|
| 17441 |
"epoch": 32.717156105100464,
|
| 17442 |
"eval_loss": 1.14347243309021,
|
| 17443 |
+
"eval_runtime": 11.3094,
|
| 17444 |
+
"eval_samples_per_second": 50.843,
|
| 17445 |
+
"eval_steps_per_second": 12.733,
|
| 17446 |
"num_input_tokens_seen": 14448176,
|
| 17447 |
"step": 10600
|
| 17448 |
},
|
|
|
|
| 17769 |
{
|
| 17770 |
"epoch": 33.3338485316847,
|
| 17771 |
"eval_loss": 1.0730254650115967,
|
| 17772 |
+
"eval_runtime": 11.3076,
|
| 17773 |
+
"eval_samples_per_second": 50.851,
|
| 17774 |
+
"eval_steps_per_second": 12.735,
|
| 17775 |
"num_input_tokens_seen": 14718096,
|
| 17776 |
"step": 10800
|
| 17777 |
},
|
|
|
|
| 18098 |
{
|
| 18099 |
"epoch": 33.95208655332303,
|
| 18100 |
"eval_loss": 1.0351147651672363,
|
| 18101 |
+
"eval_runtime": 11.2934,
|
| 18102 |
+
"eval_samples_per_second": 50.915,
|
| 18103 |
+
"eval_steps_per_second": 12.751,
|
| 18104 |
"num_input_tokens_seen": 14992048,
|
| 18105 |
"step": 11000
|
| 18106 |
},
|
|
|
|
| 18427 |
{
|
| 18428 |
"epoch": 34.568778979907265,
|
| 18429 |
"eval_loss": 1.1394553184509277,
|
| 18430 |
+
"eval_runtime": 11.3011,
|
| 18431 |
+
"eval_samples_per_second": 50.88,
|
| 18432 |
+
"eval_steps_per_second": 12.742,
|
| 18433 |
"num_input_tokens_seen": 15265072,
|
| 18434 |
"step": 11200
|
| 18435 |
},
|
|
|
|
| 18756 |
{
|
| 18757 |
"epoch": 35.1854714064915,
|
| 18758 |
"eval_loss": 1.1201566457748413,
|
| 18759 |
+
"eval_runtime": 11.2937,
|
| 18760 |
+
"eval_samples_per_second": 50.913,
|
| 18761 |
+
"eval_steps_per_second": 12.75,
|
| 18762 |
"num_input_tokens_seen": 15538960,
|
| 18763 |
"step": 11400
|
| 18764 |
},
|
|
|
|
| 19085 |
{
|
| 19086 |
"epoch": 35.80370942812983,
|
| 19087 |
"eval_loss": 1.133685827255249,
|
| 19088 |
+
"eval_runtime": 11.2974,
|
| 19089 |
+
"eval_samples_per_second": 50.897,
|
| 19090 |
+
"eval_steps_per_second": 12.746,
|
| 19091 |
"num_input_tokens_seen": 15812880,
|
| 19092 |
"step": 11600
|
| 19093 |
},
|
|
|
|
| 19414 |
{
|
| 19415 |
"epoch": 36.420401854714065,
|
| 19416 |
"eval_loss": 1.1776589155197144,
|
| 19417 |
+
"eval_runtime": 11.3188,
|
| 19418 |
+
"eval_samples_per_second": 50.8,
|
| 19419 |
+
"eval_steps_per_second": 12.722,
|
| 19420 |
"num_input_tokens_seen": 16082608,
|
| 19421 |
"step": 11800
|
| 19422 |
},
|
|
|
|
| 19743 |
{
|
| 19744 |
"epoch": 37.0370942812983,
|
| 19745 |
"eval_loss": 1.18972909450531,
|
| 19746 |
+
"eval_runtime": 11.3021,
|
| 19747 |
+
"eval_samples_per_second": 50.875,
|
| 19748 |
+
"eval_steps_per_second": 12.741,
|
| 19749 |
"num_input_tokens_seen": 16357888,
|
| 19750 |
"step": 12000
|
| 19751 |
},
|
|
|
|
| 20072 |
{
|
| 20073 |
"epoch": 37.65533230293663,
|
| 20074 |
"eval_loss": 1.2221449613571167,
|
| 20075 |
+
"eval_runtime": 11.2926,
|
| 20076 |
+
"eval_samples_per_second": 50.918,
|
| 20077 |
+
"eval_steps_per_second": 12.752,
|
| 20078 |
"num_input_tokens_seen": 16627872,
|
| 20079 |
"step": 12200
|
| 20080 |
},
|
|
|
|
| 20401 |
{
|
| 20402 |
"epoch": 38.272024729520865,
|
| 20403 |
"eval_loss": 1.169758677482605,
|
| 20404 |
+
"eval_runtime": 11.2954,
|
| 20405 |
+
"eval_samples_per_second": 50.906,
|
| 20406 |
+
"eval_steps_per_second": 12.749,
|
| 20407 |
"num_input_tokens_seen": 16900336,
|
| 20408 |
"step": 12400
|
| 20409 |
},
|
|
|
|
| 20730 |
{
|
| 20731 |
"epoch": 38.8902627511592,
|
| 20732 |
"eval_loss": 1.1674479246139526,
|
| 20733 |
+
"eval_runtime": 11.3053,
|
| 20734 |
+
"eval_samples_per_second": 50.861,
|
| 20735 |
+
"eval_steps_per_second": 12.737,
|
| 20736 |
"num_input_tokens_seen": 17175024,
|
| 20737 |
"step": 12600
|
| 20738 |
},
|
|
|
|
| 21059 |
{
|
| 21060 |
"epoch": 39.50695517774343,
|
| 21061 |
"eval_loss": 1.1664071083068848,
|
| 21062 |
+
"eval_runtime": 11.2986,
|
| 21063 |
+
"eval_samples_per_second": 50.891,
|
| 21064 |
+
"eval_steps_per_second": 12.745,
|
| 21065 |
"num_input_tokens_seen": 17446864,
|
| 21066 |
"step": 12800
|
| 21067 |
},
|
|
|
|
| 21388 |
{
|
| 21389 |
"epoch": 40.123647604327665,
|
| 21390 |
"eval_loss": 1.2493196725845337,
|
| 21391 |
+
"eval_runtime": 11.2977,
|
| 21392 |
+
"eval_samples_per_second": 50.895,
|
| 21393 |
+
"eval_steps_per_second": 12.746,
|
| 21394 |
"num_input_tokens_seen": 17716560,
|
| 21395 |
"step": 13000
|
| 21396 |
},
|
|
|
|
| 21717 |
{
|
| 21718 |
"epoch": 40.741885625966,
|
| 21719 |
"eval_loss": 1.3186978101730347,
|
| 21720 |
+
"eval_runtime": 11.3068,
|
| 21721 |
+
"eval_samples_per_second": 50.855,
|
| 21722 |
+
"eval_steps_per_second": 12.736,
|
| 21723 |
"num_input_tokens_seen": 17991792,
|
| 21724 |
"step": 13200
|
| 21725 |
},
|
|
|
|
| 22046 |
{
|
| 22047 |
"epoch": 41.35857805255023,
|
| 22048 |
"eval_loss": 1.256793737411499,
|
| 22049 |
+
"eval_runtime": 11.299,
|
| 22050 |
+
"eval_samples_per_second": 50.889,
|
| 22051 |
+
"eval_steps_per_second": 12.744,
|
| 22052 |
"num_input_tokens_seen": 18262992,
|
| 22053 |
"step": 13400
|
| 22054 |
},
|
|
|
|
| 22375 |
{
|
| 22376 |
"epoch": 41.97681607418856,
|
| 22377 |
"eval_loss": 1.2447845935821533,
|
| 22378 |
+
"eval_runtime": 11.3003,
|
| 22379 |
+
"eval_samples_per_second": 50.884,
|
| 22380 |
+
"eval_steps_per_second": 12.743,
|
| 22381 |
"num_input_tokens_seen": 18536880,
|
| 22382 |
"step": 13600
|
| 22383 |
},
|
|
|
|
| 22704 |
{
|
| 22705 |
"epoch": 42.5935085007728,
|
| 22706 |
"eval_loss": 1.233655333518982,
|
| 22707 |
+
"eval_runtime": 11.2947,
|
| 22708 |
+
"eval_samples_per_second": 50.909,
|
| 22709 |
+
"eval_steps_per_second": 12.749,
|
| 22710 |
"num_input_tokens_seen": 18806784,
|
| 22711 |
"step": 13800
|
| 22712 |
},
|
|
|
|
| 23033 |
{
|
| 23034 |
"epoch": 43.210200927357036,
|
| 23035 |
"eval_loss": 1.254447102546692,
|
| 23036 |
+
"eval_runtime": 11.3401,
|
| 23037 |
+
"eval_samples_per_second": 50.705,
|
| 23038 |
+
"eval_steps_per_second": 12.698,
|
| 23039 |
"num_input_tokens_seen": 19080608,
|
| 23040 |
"step": 14000
|
| 23041 |
},
|
|
|
|
| 23362 |
{
|
| 23363 |
"epoch": 43.82843894899536,
|
| 23364 |
"eval_loss": 1.3475619554519653,
|
| 23365 |
+
"eval_runtime": 11.3306,
|
| 23366 |
+
"eval_samples_per_second": 50.748,
|
| 23367 |
+
"eval_steps_per_second": 12.709,
|
| 23368 |
"num_input_tokens_seen": 19352320,
|
| 23369 |
"step": 14200
|
| 23370 |
},
|
|
|
|
| 23691 |
{
|
| 23692 |
"epoch": 44.4451313755796,
|
| 23693 |
"eval_loss": 1.2956358194351196,
|
| 23694 |
+
"eval_runtime": 11.3002,
|
| 23695 |
+
"eval_samples_per_second": 50.884,
|
| 23696 |
+
"eval_steps_per_second": 12.743,
|
| 23697 |
"num_input_tokens_seen": 19624544,
|
| 23698 |
"step": 14400
|
| 23699 |
},
|
|
|
|
| 24020 |
{
|
| 24021 |
"epoch": 45.061823802163836,
|
| 24022 |
"eval_loss": 1.214294195175171,
|
| 24023 |
+
"eval_runtime": 11.3055,
|
| 24024 |
+
"eval_samples_per_second": 50.86,
|
| 24025 |
+
"eval_steps_per_second": 12.737,
|
| 24026 |
"num_input_tokens_seen": 19896064,
|
| 24027 |
"step": 14600
|
| 24028 |
},
|
|
|
|
| 24349 |
{
|
| 24350 |
"epoch": 45.68006182380216,
|
| 24351 |
"eval_loss": 1.200486660003662,
|
| 24352 |
+
"eval_runtime": 11.2896,
|
| 24353 |
+
"eval_samples_per_second": 50.932,
|
| 24354 |
+
"eval_steps_per_second": 12.755,
|
| 24355 |
"num_input_tokens_seen": 20168064,
|
| 24356 |
"step": 14800
|
| 24357 |
},
|
|
|
|
| 24678 |
{
|
| 24679 |
"epoch": 46.2967542503864,
|
| 24680 |
"eval_loss": 1.3230748176574707,
|
| 24681 |
+
"eval_runtime": 11.2946,
|
| 24682 |
+
"eval_samples_per_second": 50.909,
|
| 24683 |
+
"eval_steps_per_second": 12.749,
|
| 24684 |
"num_input_tokens_seen": 20440208,
|
| 24685 |
"step": 15000
|
| 24686 |
},
|
|
|
|
| 25007 |
{
|
| 25008 |
"epoch": 46.914992272024726,
|
| 25009 |
"eval_loss": 1.2638696432113647,
|
| 25010 |
+
"eval_runtime": 11.3107,
|
| 25011 |
+
"eval_samples_per_second": 50.837,
|
| 25012 |
+
"eval_steps_per_second": 12.731,
|
| 25013 |
"num_input_tokens_seen": 20713296,
|
| 25014 |
"step": 15200
|
| 25015 |
},
|
|
|
|
| 25336 |
{
|
| 25337 |
"epoch": 47.53168469860896,
|
| 25338 |
"eval_loss": 1.3379755020141602,
|
| 25339 |
+
"eval_runtime": 11.306,
|
| 25340 |
+
"eval_samples_per_second": 50.858,
|
| 25341 |
+
"eval_steps_per_second": 12.737,
|
| 25342 |
"num_input_tokens_seen": 20985744,
|
| 25343 |
"step": 15400
|
| 25344 |
},
|
|
|
|
| 25665 |
{
|
| 25666 |
"epoch": 48.1483771251932,
|
| 25667 |
"eval_loss": 1.2503776550292969,
|
| 25668 |
+
"eval_runtime": 11.2966,
|
| 25669 |
+
"eval_samples_per_second": 50.9,
|
| 25670 |
+
"eval_steps_per_second": 12.747,
|
| 25671 |
"num_input_tokens_seen": 21257920,
|
| 25672 |
"step": 15600
|
| 25673 |
},
|
|
|
|
| 25994 |
{
|
| 25995 |
"epoch": 48.76661514683153,
|
| 25996 |
"eval_loss": 1.2862586975097656,
|
| 25997 |
+
"eval_runtime": 11.3031,
|
| 25998 |
+
"eval_samples_per_second": 50.871,
|
| 25999 |
+
"eval_steps_per_second": 12.74,
|
| 26000 |
"num_input_tokens_seen": 21529248,
|
| 26001 |
"step": 15800
|
| 26002 |
},
|
|
|
|
| 26323 |
{
|
| 26324 |
"epoch": 49.38330757341576,
|
| 26325 |
"eval_loss": 1.312309741973877,
|
| 26326 |
+
"eval_runtime": 11.2944,
|
| 26327 |
+
"eval_samples_per_second": 50.91,
|
| 26328 |
+
"eval_steps_per_second": 12.75,
|
| 26329 |
"num_input_tokens_seen": 21800992,
|
| 26330 |
"step": 16000
|
| 26331 |
},
|
|
|
|
| 26652 |
{
|
| 26653 |
"epoch": 50.0,
|
| 26654 |
"eval_loss": 1.2966762781143188,
|
| 26655 |
+
"eval_runtime": 11.2763,
|
| 26656 |
+
"eval_samples_per_second": 50.992,
|
| 26657 |
+
"eval_steps_per_second": 12.77,
|
| 26658 |
"num_input_tokens_seen": 22073392,
|
| 26659 |
"step": 16200
|
| 26660 |
},
|
|
|
|
| 26981 |
{
|
| 26982 |
"epoch": 50.618238021638334,
|
| 26983 |
"eval_loss": 1.3632538318634033,
|
| 26984 |
+
"eval_runtime": 11.3003,
|
| 26985 |
+
"eval_samples_per_second": 50.884,
|
| 26986 |
+
"eval_steps_per_second": 12.743,
|
| 26987 |
"num_input_tokens_seen": 22345648,
|
| 26988 |
"step": 16400
|
| 26989 |
},
|
|
|
|
| 27310 |
{
|
| 27311 |
"epoch": 51.23493044822256,
|
| 27312 |
"eval_loss": 1.3670175075531006,
|
| 27313 |
+
"eval_runtime": 11.296,
|
| 27314 |
+
"eval_samples_per_second": 50.903,
|
| 27315 |
+
"eval_steps_per_second": 12.748,
|
| 27316 |
"num_input_tokens_seen": 22617984,
|
| 27317 |
"step": 16600
|
| 27318 |
},
|
|
|
|
| 27639 |
{
|
| 27640 |
"epoch": 51.8531684698609,
|
| 27641 |
"eval_loss": 1.3320527076721191,
|
| 27642 |
+
"eval_runtime": 11.3009,
|
| 27643 |
+
"eval_samples_per_second": 50.881,
|
| 27644 |
+
"eval_steps_per_second": 12.742,
|
| 27645 |
"num_input_tokens_seen": 22892544,
|
| 27646 |
"step": 16800
|
| 27647 |
},
|
|
|
|
| 27968 |
{
|
| 27969 |
"epoch": 52.469860896445134,
|
| 27970 |
"eval_loss": 1.430206537246704,
|
| 27971 |
+
"eval_runtime": 11.3085,
|
| 27972 |
+
"eval_samples_per_second": 50.847,
|
| 27973 |
+
"eval_steps_per_second": 12.734,
|
| 27974 |
"num_input_tokens_seen": 23163488,
|
| 27975 |
"step": 17000
|
| 27976 |
},
|
|
|
|
| 28297 |
{
|
| 28298 |
"epoch": 53.086553323029364,
|
| 28299 |
"eval_loss": 1.321289300918579,
|
| 28300 |
+
"eval_runtime": 11.3024,
|
| 28301 |
+
"eval_samples_per_second": 50.874,
|
| 28302 |
+
"eval_steps_per_second": 12.741,
|
| 28303 |
"num_input_tokens_seen": 23438320,
|
| 28304 |
"step": 17200
|
| 28305 |
},
|
|
|
|
| 28626 |
{
|
| 28627 |
"epoch": 53.7047913446677,
|
| 28628 |
"eval_loss": 1.4570552110671997,
|
| 28629 |
+
"eval_runtime": 11.3064,
|
| 28630 |
+
"eval_samples_per_second": 50.856,
|
| 28631 |
+
"eval_steps_per_second": 12.736,
|
| 28632 |
"num_input_tokens_seen": 23708720,
|
| 28633 |
"step": 17400
|
| 28634 |
},
|
|
|
|
| 28955 |
{
|
| 28956 |
"epoch": 54.321483771251934,
|
| 28957 |
"eval_loss": 1.3873727321624756,
|
| 28958 |
+
"eval_runtime": 11.2922,
|
| 28959 |
+
"eval_samples_per_second": 50.92,
|
| 28960 |
+
"eval_steps_per_second": 12.752,
|
| 28961 |
"num_input_tokens_seen": 23984304,
|
| 28962 |
"step": 17600
|
| 28963 |
},
|
|
|
|
| 29284 |
{
|
| 29285 |
"epoch": 54.93972179289026,
|
| 29286 |
"eval_loss": 1.4202662706375122,
|
| 29287 |
+
"eval_runtime": 11.3232,
|
| 29288 |
+
"eval_samples_per_second": 50.781,
|
| 29289 |
+
"eval_steps_per_second": 12.717,
|
| 29290 |
"num_input_tokens_seen": 24256368,
|
| 29291 |
"step": 17800
|
| 29292 |
},
|
|
|
|
| 29613 |
{
|
| 29614 |
"epoch": 55.5564142194745,
|
| 29615 |
"eval_loss": 1.3830780982971191,
|
| 29616 |
+
"eval_runtime": 11.3013,
|
| 29617 |
+
"eval_samples_per_second": 50.879,
|
| 29618 |
+
"eval_steps_per_second": 12.742,
|
| 29619 |
"num_input_tokens_seen": 24527040,
|
| 29620 |
"step": 18000
|
| 29621 |
},
|
|
|
|
| 29942 |
{
|
| 29943 |
"epoch": 56.173106646058734,
|
| 29944 |
"eval_loss": 1.4858934879302979,
|
| 29945 |
+
"eval_runtime": 11.2884,
|
| 29946 |
+
"eval_samples_per_second": 50.937,
|
| 29947 |
+
"eval_steps_per_second": 12.756,
|
| 29948 |
"num_input_tokens_seen": 24799312,
|
| 29949 |
"step": 18200
|
| 29950 |
},
|
|
|
|
| 30271 |
{
|
| 30272 |
"epoch": 56.79134466769706,
|
| 30273 |
"eval_loss": 1.5054408311843872,
|
| 30274 |
+
"eval_runtime": 11.2984,
|
| 30275 |
+
"eval_samples_per_second": 50.892,
|
| 30276 |
+
"eval_steps_per_second": 12.745,
|
| 30277 |
"num_input_tokens_seen": 25072848,
|
| 30278 |
"step": 18400
|
| 30279 |
},
|
|
|
|
| 30600 |
{
|
| 30601 |
"epoch": 57.4080370942813,
|
| 30602 |
"eval_loss": 1.473268747329712,
|
| 30603 |
+
"eval_runtime": 11.3001,
|
| 30604 |
+
"eval_samples_per_second": 50.884,
|
| 30605 |
+
"eval_steps_per_second": 12.743,
|
| 30606 |
"num_input_tokens_seen": 25347056,
|
| 30607 |
"step": 18600
|
| 30608 |
},
|
|
|
|
| 30929 |
{
|
| 30930 |
"epoch": 58.024729520865534,
|
| 30931 |
"eval_loss": 1.5095571279525757,
|
| 30932 |
+
"eval_runtime": 11.2966,
|
| 30933 |
+
"eval_samples_per_second": 50.9,
|
| 30934 |
+
"eval_steps_per_second": 12.747,
|
| 30935 |
"num_input_tokens_seen": 25618400,
|
| 30936 |
"step": 18800
|
| 30937 |
},
|
|
|
|
| 31258 |
{
|
| 31259 |
"epoch": 58.64296754250386,
|
| 31260 |
"eval_loss": 1.3225481510162354,
|
| 31261 |
+
"eval_runtime": 11.3183,
|
| 31262 |
+
"eval_samples_per_second": 50.803,
|
| 31263 |
+
"eval_steps_per_second": 12.723,
|
| 31264 |
"num_input_tokens_seen": 25892960,
|
| 31265 |
"step": 19000
|
| 31266 |
},
|
|
|
|
| 31587 |
{
|
| 31588 |
"epoch": 59.2596599690881,
|
| 31589 |
"eval_loss": 1.4172106981277466,
|
| 31590 |
+
"eval_runtime": 11.3028,
|
| 31591 |
+
"eval_samples_per_second": 50.872,
|
| 31592 |
+
"eval_steps_per_second": 12.74,
|
| 31593 |
"num_input_tokens_seen": 26164688,
|
| 31594 |
"step": 19200
|
| 31595 |
},
|
|
|
|
| 31916 |
{
|
| 31917 |
"epoch": 59.87789799072643,
|
| 31918 |
"eval_loss": 1.35789155960083,
|
| 31919 |
+
"eval_runtime": 11.2938,
|
| 31920 |
+
"eval_samples_per_second": 50.913,
|
| 31921 |
+
"eval_steps_per_second": 12.75,
|
| 31922 |
"num_input_tokens_seen": 26437392,
|
| 31923 |
"step": 19400
|
| 31924 |
},
|
|
|
|
| 32245 |
{
|
| 32246 |
"epoch": 60.49459041731066,
|
| 32247 |
"eval_loss": 1.406263828277588,
|
| 32248 |
+
"eval_runtime": 11.2941,
|
| 32249 |
+
"eval_samples_per_second": 50.911,
|
| 32250 |
+
"eval_steps_per_second": 12.75,
|
| 32251 |
"num_input_tokens_seen": 26710176,
|
| 32252 |
"step": 19600
|
| 32253 |
},
|
|
|
|
| 32574 |
{
|
| 32575 |
"epoch": 61.1112828438949,
|
| 32576 |
"eval_loss": 1.4265893697738647,
|
| 32577 |
+
"eval_runtime": 11.3038,
|
| 32578 |
+
"eval_samples_per_second": 50.868,
|
| 32579 |
+
"eval_steps_per_second": 12.739,
|
| 32580 |
"num_input_tokens_seen": 26981728,
|
| 32581 |
"step": 19800
|
| 32582 |
},
|
|
|
|
| 32903 |
{
|
| 32904 |
"epoch": 61.72952086553323,
|
| 32905 |
"eval_loss": 1.3551362752914429,
|
| 32906 |
+
"eval_runtime": 11.2955,
|
| 32907 |
+
"eval_samples_per_second": 50.905,
|
| 32908 |
+
"eval_steps_per_second": 12.748,
|
| 32909 |
"num_input_tokens_seen": 27253632,
|
| 32910 |
"step": 20000
|
| 32911 |
},
|
|
|
|
| 33232 |
{
|
| 33233 |
"epoch": 62.34621329211747,
|
| 33234 |
"eval_loss": 1.4743679761886597,
|
| 33235 |
+
"eval_runtime": 11.2915,
|
| 33236 |
+
"eval_samples_per_second": 50.923,
|
| 33237 |
+
"eval_steps_per_second": 12.753,
|
| 33238 |
"num_input_tokens_seen": 27524928,
|
| 33239 |
"step": 20200
|
| 33240 |
},
|
|
|
|
| 33561 |
{
|
| 33562 |
"epoch": 62.964451313755795,
|
| 33563 |
"eval_loss": 1.5115978717803955,
|
| 33564 |
+
"eval_runtime": 11.314,
|
| 33565 |
+
"eval_samples_per_second": 50.822,
|
| 33566 |
+
"eval_steps_per_second": 12.728,
|
| 33567 |
"num_input_tokens_seen": 27799712,
|
| 33568 |
"step": 20400
|
| 33569 |
},
|
|
|
|
| 33890 |
{
|
| 33891 |
"epoch": 63.58114374034003,
|
| 33892 |
"eval_loss": 1.5977118015289307,
|
| 33893 |
+
"eval_runtime": 11.3135,
|
| 33894 |
+
"eval_samples_per_second": 50.824,
|
| 33895 |
+
"eval_steps_per_second": 12.728,
|
| 33896 |
"num_input_tokens_seen": 28071024,
|
| 33897 |
"step": 20600
|
| 33898 |
},
|
|
|
|
| 34219 |
{
|
| 34220 |
"epoch": 64.19783616692426,
|
| 34221 |
"eval_loss": 1.5763089656829834,
|
| 34222 |
+
"eval_runtime": 11.2999,
|
| 34223 |
+
"eval_samples_per_second": 50.885,
|
| 34224 |
+
"eval_steps_per_second": 12.743,
|
| 34225 |
"num_input_tokens_seen": 28342880,
|
| 34226 |
"step": 20800
|
| 34227 |
},
|
|
|
|
| 34548 |
{
|
| 34549 |
"epoch": 64.8160741885626,
|
| 34550 |
"eval_loss": 1.6289054155349731,
|
| 34551 |
+
"eval_runtime": 11.3101,
|
| 34552 |
+
"eval_samples_per_second": 50.839,
|
| 34553 |
+
"eval_steps_per_second": 12.732,
|
| 34554 |
"num_input_tokens_seen": 28617696,
|
| 34555 |
"step": 21000
|
| 34556 |
},
|
|
|
|
| 34877 |
{
|
| 34878 |
"epoch": 65.43276661514683,
|
| 34879 |
"eval_loss": 1.6688075065612793,
|
| 34880 |
+
"eval_runtime": 11.304,
|
| 34881 |
+
"eval_samples_per_second": 50.867,
|
| 34882 |
+
"eval_steps_per_second": 12.739,
|
| 34883 |
"num_input_tokens_seen": 28888112,
|
| 34884 |
"step": 21200
|
| 34885 |
},
|
|
|
|
| 35206 |
{
|
| 35207 |
"epoch": 66.04945904173107,
|
| 35208 |
"eval_loss": 1.6155662536621094,
|
| 35209 |
+
"eval_runtime": 11.3029,
|
| 35210 |
+
"eval_samples_per_second": 50.872,
|
| 35211 |
+
"eval_steps_per_second": 12.74,
|
| 35212 |
"num_input_tokens_seen": 29162944,
|
| 35213 |
"step": 21400
|
| 35214 |
},
|
|
|
|
| 35535 |
{
|
| 35536 |
"epoch": 66.6676970633694,
|
| 35537 |
"eval_loss": 1.6828913688659668,
|
| 35538 |
+
"eval_runtime": 11.2964,
|
| 35539 |
+
"eval_samples_per_second": 50.901,
|
| 35540 |
+
"eval_steps_per_second": 12.747,
|
| 35541 |
"num_input_tokens_seen": 29434784,
|
| 35542 |
"step": 21600
|
| 35543 |
},
|
|
|
|
| 35864 |
{
|
| 35865 |
"epoch": 67.28438948995363,
|
| 35866 |
"eval_loss": 1.6700409650802612,
|
| 35867 |
+
"eval_runtime": 11.3083,
|
| 35868 |
+
"eval_samples_per_second": 50.847,
|
| 35869 |
+
"eval_steps_per_second": 12.734,
|
| 35870 |
"num_input_tokens_seen": 29706800,
|
| 35871 |
"step": 21800
|
| 35872 |
},
|
|
|
|
| 36193 |
{
|
| 36194 |
"epoch": 67.90262751159196,
|
| 36195 |
"eval_loss": 1.6916232109069824,
|
| 36196 |
+
"eval_runtime": 11.2968,
|
| 36197 |
+
"eval_samples_per_second": 50.899,
|
| 36198 |
+
"eval_steps_per_second": 12.747,
|
| 36199 |
"num_input_tokens_seen": 29980240,
|
| 36200 |
"step": 22000
|
| 36201 |
},
|
|
|
|
| 36522 |
{
|
| 36523 |
"epoch": 68.5193199381762,
|
| 36524 |
"eval_loss": 1.7332632541656494,
|
| 36525 |
+
"eval_runtime": 11.2865,
|
| 36526 |
+
"eval_samples_per_second": 50.946,
|
| 36527 |
+
"eval_steps_per_second": 12.759,
|
| 36528 |
"num_input_tokens_seen": 30250192,
|
| 36529 |
"step": 22200
|
| 36530 |
},
|
|
|
|
| 36851 |
{
|
| 36852 |
"epoch": 69.13601236476043,
|
| 36853 |
"eval_loss": 1.7388529777526855,
|
| 36854 |
+
"eval_runtime": 11.3123,
|
| 36855 |
+
"eval_samples_per_second": 50.83,
|
| 36856 |
+
"eval_steps_per_second": 12.73,
|
| 36857 |
"num_input_tokens_seen": 30522672,
|
| 36858 |
"step": 22400
|
| 36859 |
},
|
|
|
|
| 37180 |
{
|
| 37181 |
"epoch": 69.75425038639877,
|
| 37182 |
"eval_loss": 1.7202584743499756,
|
| 37183 |
+
"eval_runtime": 11.2903,
|
| 37184 |
+
"eval_samples_per_second": 50.928,
|
| 37185 |
+
"eval_steps_per_second": 12.754,
|
| 37186 |
"num_input_tokens_seen": 30795024,
|
| 37187 |
"step": 22600
|
| 37188 |
},
|
|
|
|
| 37509 |
{
|
| 37510 |
"epoch": 70.370942812983,
|
| 37511 |
"eval_loss": 1.7700324058532715,
|
| 37512 |
+
"eval_runtime": 11.3014,
|
| 37513 |
+
"eval_samples_per_second": 50.878,
|
| 37514 |
+
"eval_steps_per_second": 12.742,
|
| 37515 |
"num_input_tokens_seen": 31066544,
|
| 37516 |
"step": 22800
|
| 37517 |
},
|
|
|
|
| 37838 |
{
|
| 37839 |
"epoch": 70.98918083462132,
|
| 37840 |
"eval_loss": 1.769662857055664,
|
| 37841 |
+
"eval_runtime": 11.3053,
|
| 37842 |
+
"eval_samples_per_second": 50.861,
|
| 37843 |
+
"eval_steps_per_second": 12.737,
|
| 37844 |
"num_input_tokens_seen": 31338128,
|
| 37845 |
"step": 23000
|
| 37846 |
},
|
|
|
|
| 38167 |
{
|
| 38168 |
"epoch": 71.60587326120556,
|
| 38169 |
"eval_loss": 1.8099125623703003,
|
| 38170 |
+
"eval_runtime": 11.297,
|
| 38171 |
+
"eval_samples_per_second": 50.898,
|
| 38172 |
+
"eval_steps_per_second": 12.747,
|
| 38173 |
"num_input_tokens_seen": 31609104,
|
| 38174 |
"step": 23200
|
| 38175 |
},
|
|
|
|
| 38496 |
{
|
| 38497 |
"epoch": 72.2225656877898,
|
| 38498 |
"eval_loss": 1.856191873550415,
|
| 38499 |
+
"eval_runtime": 11.2897,
|
| 38500 |
+
"eval_samples_per_second": 50.931,
|
| 38501 |
+
"eval_steps_per_second": 12.755,
|
| 38502 |
"num_input_tokens_seen": 31881424,
|
| 38503 |
"step": 23400
|
| 38504 |
},
|
|
|
|
| 38825 |
{
|
| 38826 |
"epoch": 72.84080370942813,
|
| 38827 |
"eval_loss": 1.7837176322937012,
|
| 38828 |
+
"eval_runtime": 11.3065,
|
| 38829 |
+
"eval_samples_per_second": 50.856,
|
| 38830 |
+
"eval_steps_per_second": 12.736,
|
| 38831 |
"num_input_tokens_seen": 32155024,
|
| 38832 |
"step": 23600
|
| 38833 |
},
|
|
|
|
| 39154 |
{
|
| 39155 |
"epoch": 73.45749613601237,
|
| 39156 |
"eval_loss": 1.8125648498535156,
|
| 39157 |
+
"eval_runtime": 11.2948,
|
| 39158 |
+
"eval_samples_per_second": 50.909,
|
| 39159 |
+
"eval_steps_per_second": 12.749,
|
| 39160 |
"num_input_tokens_seen": 32425312,
|
| 39161 |
"step": 23800
|
| 39162 |
},
|
|
|
|
| 39483 |
{
|
| 39484 |
"epoch": 74.0741885625966,
|
| 39485 |
"eval_loss": 1.8575142621994019,
|
| 39486 |
+
"eval_runtime": 11.2926,
|
| 39487 |
+
"eval_samples_per_second": 50.918,
|
| 39488 |
+
"eval_steps_per_second": 12.752,
|
| 39489 |
"num_input_tokens_seen": 32698784,
|
| 39490 |
"step": 24000
|
| 39491 |
},
|
|
|
|
| 39812 |
{
|
| 39813 |
"epoch": 74.69242658423494,
|
| 39814 |
"eval_loss": 1.8753187656402588,
|
| 39815 |
+
"eval_runtime": 11.2917,
|
| 39816 |
+
"eval_samples_per_second": 50.922,
|
| 39817 |
+
"eval_steps_per_second": 12.753,
|
| 39818 |
"num_input_tokens_seen": 32974144,
|
| 39819 |
"step": 24200
|
| 39820 |
},
|
|
|
|
| 40141 |
{
|
| 40142 |
"epoch": 75.30911901081916,
|
| 40143 |
"eval_loss": 1.9167370796203613,
|
| 40144 |
+
"eval_runtime": 11.3077,
|
| 40145 |
+
"eval_samples_per_second": 50.85,
|
| 40146 |
+
"eval_steps_per_second": 12.735,
|
| 40147 |
"num_input_tokens_seen": 33245216,
|
| 40148 |
"step": 24400
|
| 40149 |
},
|
|
|
|
| 40470 |
{
|
| 40471 |
"epoch": 75.9273570324575,
|
| 40472 |
"eval_loss": 1.1968048810958862,
|
| 40473 |
+
"eval_runtime": 11.3303,
|
| 40474 |
+
"eval_samples_per_second": 50.749,
|
| 40475 |
+
"eval_steps_per_second": 12.709,
|
| 40476 |
"num_input_tokens_seen": 33517088,
|
| 40477 |
"step": 24600
|
| 40478 |
},
|
|
|
|
| 40799 |
{
|
| 40800 |
"epoch": 76.54404945904173,
|
| 40801 |
"eval_loss": 1.3781951665878296,
|
| 40802 |
+
"eval_runtime": 11.2957,
|
| 40803 |
+
"eval_samples_per_second": 50.904,
|
| 40804 |
+
"eval_steps_per_second": 12.748,
|
| 40805 |
"num_input_tokens_seen": 33788432,
|
| 40806 |
"step": 24800
|
| 40807 |
},
|
|
|
|
| 41128 |
{
|
| 41129 |
"epoch": 77.16074188562597,
|
| 41130 |
"eval_loss": 1.5010449886322021,
|
| 41131 |
+
"eval_runtime": 11.327,
|
| 41132 |
+
"eval_samples_per_second": 50.763,
|
| 41133 |
+
"eval_steps_per_second": 12.713,
|
| 41134 |
"num_input_tokens_seen": 34060416,
|
| 41135 |
"step": 25000
|
| 41136 |
},
|
|
|
|
| 41457 |
{
|
| 41458 |
"epoch": 77.7789799072643,
|
| 41459 |
"eval_loss": 1.5149627923965454,
|
| 41460 |
+
"eval_runtime": 11.2979,
|
| 41461 |
+
"eval_samples_per_second": 50.894,
|
| 41462 |
+
"eval_steps_per_second": 12.746,
|
| 41463 |
"num_input_tokens_seen": 34333408,
|
| 41464 |
"step": 25200
|
| 41465 |
},
|
|
|
|
| 41786 |
{
|
| 41787 |
"epoch": 78.39567233384854,
|
| 41788 |
"eval_loss": 1.6160272359848022,
|
| 41789 |
+
"eval_runtime": 11.2891,
|
| 41790 |
+
"eval_samples_per_second": 50.934,
|
| 41791 |
+
"eval_steps_per_second": 12.756,
|
| 41792 |
"num_input_tokens_seen": 34605392,
|
| 41793 |
"step": 25400
|
| 41794 |
},
|
|
|
|
| 42115 |
{
|
| 42116 |
"epoch": 79.01236476043276,
|
| 42117 |
"eval_loss": 1.5819573402404785,
|
| 42118 |
+
"eval_runtime": 11.3204,
|
| 42119 |
+
"eval_samples_per_second": 50.793,
|
| 42120 |
+
"eval_steps_per_second": 12.72,
|
| 42121 |
"num_input_tokens_seen": 34879536,
|
| 42122 |
"step": 25600
|
| 42123 |
},
|
|
|
|
| 42444 |
{
|
| 42445 |
"epoch": 79.6306027820711,
|
| 42446 |
"eval_loss": 1.651304841041565,
|
| 42447 |
+
"eval_runtime": 11.2935,
|
| 42448 |
+
"eval_samples_per_second": 50.914,
|
| 42449 |
+
"eval_steps_per_second": 12.751,
|
| 42450 |
"num_input_tokens_seen": 35153488,
|
| 42451 |
"step": 25800
|
| 42452 |
},
|
|
|
|
| 42773 |
{
|
| 42774 |
"epoch": 80.24729520865533,
|
| 42775 |
"eval_loss": 1.6964157819747925,
|
| 42776 |
+
"eval_runtime": 11.2883,
|
| 42777 |
+
"eval_samples_per_second": 50.937,
|
| 42778 |
+
"eval_steps_per_second": 12.757,
|
| 42779 |
"num_input_tokens_seen": 35424912,
|
| 42780 |
"step": 26000
|
| 42781 |
},
|
|
|
|
| 43102 |
{
|
| 43103 |
"epoch": 80.86553323029366,
|
| 43104 |
"eval_loss": 1.7483088970184326,
|
| 43105 |
+
"eval_runtime": 11.3043,
|
| 43106 |
+
"eval_samples_per_second": 50.865,
|
| 43107 |
+
"eval_steps_per_second": 12.738,
|
| 43108 |
"num_input_tokens_seen": 35698064,
|
| 43109 |
"step": 26200
|
| 43110 |
},
|
|
|
|
| 43431 |
{
|
| 43432 |
"epoch": 81.4822256568779,
|
| 43433 |
"eval_loss": 1.7370902299880981,
|
| 43434 |
+
"eval_runtime": 11.2996,
|
| 43435 |
+
"eval_samples_per_second": 50.887,
|
| 43436 |
+
"eval_steps_per_second": 12.744,
|
| 43437 |
"num_input_tokens_seen": 35968160,
|
| 43438 |
"step": 26400
|
| 43439 |
},
|
|
|
|
| 43760 |
{
|
| 43761 |
"epoch": 82.09891808346214,
|
| 43762 |
"eval_loss": 1.7790963649749756,
|
| 43763 |
+
"eval_runtime": 11.2991,
|
| 43764 |
+
"eval_samples_per_second": 50.889,
|
| 43765 |
+
"eval_steps_per_second": 12.744,
|
| 43766 |
"num_input_tokens_seen": 36240928,
|
| 43767 |
"step": 26600
|
| 43768 |
},
|
|
|
|
| 44089 |
{
|
| 44090 |
"epoch": 82.71715610510046,
|
| 44091 |
"eval_loss": 1.772797703742981,
|
| 44092 |
+
"eval_runtime": 11.2933,
|
| 44093 |
+
"eval_samples_per_second": 50.915,
|
| 44094 |
+
"eval_steps_per_second": 12.751,
|
| 44095 |
"num_input_tokens_seen": 36514208,
|
| 44096 |
"step": 26800
|
| 44097 |
},
|
|
|
|
| 44418 |
{
|
| 44419 |
"epoch": 83.3338485316847,
|
| 44420 |
"eval_loss": 1.7722996473312378,
|
| 44421 |
+
"eval_runtime": 11.3164,
|
| 44422 |
+
"eval_samples_per_second": 50.811,
|
| 44423 |
+
"eval_steps_per_second": 12.725,
|
| 44424 |
"num_input_tokens_seen": 36785136,
|
| 44425 |
"step": 27000
|
| 44426 |
},
|
|
|
|
| 44747 |
{
|
| 44748 |
"epoch": 83.95208655332303,
|
| 44749 |
"eval_loss": 1.8002300262451172,
|
| 44750 |
+
"eval_runtime": 11.2936,
|
| 44751 |
+
"eval_samples_per_second": 50.914,
|
| 44752 |
+
"eval_steps_per_second": 12.751,
|
| 44753 |
"num_input_tokens_seen": 37061648,
|
| 44754 |
"step": 27200
|
| 44755 |
},
|
|
|
|
| 45076 |
{
|
| 45077 |
"epoch": 84.56877897990726,
|
| 45078 |
"eval_loss": 1.804320216178894,
|
| 45079 |
+
"eval_runtime": 11.2862,
|
| 45080 |
+
"eval_samples_per_second": 50.947,
|
| 45081 |
+
"eval_steps_per_second": 12.759,
|
| 45082 |
"num_input_tokens_seen": 37333648,
|
| 45083 |
"step": 27400
|
| 45084 |
},
|
|
|
|
| 45405 |
{
|
| 45406 |
"epoch": 85.1854714064915,
|
| 45407 |
"eval_loss": 1.8354555368423462,
|
| 45408 |
+
"eval_runtime": 11.3126,
|
| 45409 |
+
"eval_samples_per_second": 50.828,
|
| 45410 |
+
"eval_steps_per_second": 12.729,
|
| 45411 |
"num_input_tokens_seen": 37605184,
|
| 45412 |
"step": 27600
|
| 45413 |
},
|
|
|
|
| 45734 |
{
|
| 45735 |
"epoch": 85.80370942812984,
|
| 45736 |
"eval_loss": 1.8400607109069824,
|
| 45737 |
+
"eval_runtime": 11.2893,
|
| 45738 |
+
"eval_samples_per_second": 50.933,
|
| 45739 |
+
"eval_steps_per_second": 12.755,
|
| 45740 |
"num_input_tokens_seen": 37875360,
|
| 45741 |
"step": 27800
|
| 45742 |
},
|
|
|
|
| 46063 |
{
|
| 46064 |
"epoch": 86.42040185471407,
|
| 46065 |
"eval_loss": 1.8688201904296875,
|
| 46066 |
+
"eval_runtime": 11.2959,
|
| 46067 |
+
"eval_samples_per_second": 50.904,
|
| 46068 |
+
"eval_steps_per_second": 12.748,
|
| 46069 |
"num_input_tokens_seen": 38150208,
|
| 46070 |
"step": 28000
|
| 46071 |
},
|
|
|
|
| 46392 |
{
|
| 46393 |
"epoch": 87.0370942812983,
|
| 46394 |
"eval_loss": 1.810387134552002,
|
| 46395 |
+
"eval_runtime": 11.3075,
|
| 46396 |
+
"eval_samples_per_second": 50.851,
|
| 46397 |
+
"eval_steps_per_second": 12.735,
|
| 46398 |
"num_input_tokens_seen": 38422048,
|
| 46399 |
"step": 28200
|
| 46400 |
},
|
|
|
|
| 46721 |
{
|
| 46722 |
"epoch": 87.65533230293663,
|
| 46723 |
"eval_loss": 1.8730015754699707,
|
| 46724 |
+
"eval_runtime": 11.2946,
|
| 46725 |
+
"eval_samples_per_second": 50.909,
|
| 46726 |
+
"eval_steps_per_second": 12.749,
|
| 46727 |
"num_input_tokens_seen": 38692224,
|
| 46728 |
"step": 28400
|
| 46729 |
},
|
|
|
|
| 47050 |
{
|
| 47051 |
"epoch": 88.27202472952087,
|
| 47052 |
"eval_loss": 1.8786824941635132,
|
| 47053 |
+
"eval_runtime": 11.2989,
|
| 47054 |
+
"eval_samples_per_second": 50.89,
|
| 47055 |
+
"eval_steps_per_second": 12.745,
|
| 47056 |
"num_input_tokens_seen": 38964176,
|
| 47057 |
"step": 28600
|
| 47058 |
},
|
|
|
|
| 47379 |
{
|
| 47380 |
"epoch": 88.8902627511592,
|
| 47381 |
"eval_loss": 1.8849008083343506,
|
| 47382 |
+
"eval_runtime": 11.2971,
|
| 47383 |
+
"eval_samples_per_second": 50.898,
|
| 47384 |
+
"eval_steps_per_second": 12.747,
|
| 47385 |
"num_input_tokens_seen": 39235184,
|
| 47386 |
"step": 28800
|
| 47387 |
},
|
|
|
|
| 47708 |
{
|
| 47709 |
"epoch": 89.50695517774344,
|
| 47710 |
"eval_loss": 1.9232840538024902,
|
| 47711 |
+
"eval_runtime": 11.3151,
|
| 47712 |
+
"eval_samples_per_second": 50.817,
|
| 47713 |
+
"eval_steps_per_second": 12.726,
|
| 47714 |
"num_input_tokens_seen": 39507520,
|
| 47715 |
"step": 29000
|
| 47716 |
},
|
|
|
|
| 48037 |
{
|
| 48038 |
"epoch": 90.12364760432767,
|
| 48039 |
"eval_loss": 1.9127227067947388,
|
| 48040 |
+
"eval_runtime": 11.3026,
|
| 48041 |
+
"eval_samples_per_second": 50.873,
|
| 48042 |
+
"eval_steps_per_second": 12.74,
|
| 48043 |
"num_input_tokens_seen": 39779328,
|
| 48044 |
"step": 29200
|
| 48045 |
},
|
|
|
|
| 48366 |
{
|
| 48367 |
"epoch": 90.74188562596599,
|
| 48368 |
"eval_loss": 1.8981382846832275,
|
| 48369 |
+
"eval_runtime": 11.2948,
|
| 48370 |
+
"eval_samples_per_second": 50.908,
|
| 48371 |
+
"eval_steps_per_second": 12.749,
|
| 48372 |
"num_input_tokens_seen": 40051520,
|
| 48373 |
"step": 29400
|
| 48374 |
},
|
|
|
|
| 48695 |
{
|
| 48696 |
"epoch": 91.35857805255023,
|
| 48697 |
"eval_loss": 1.9302953481674194,
|
| 48698 |
+
"eval_runtime": 11.3008,
|
| 48699 |
+
"eval_samples_per_second": 50.881,
|
| 48700 |
+
"eval_steps_per_second": 12.742,
|
| 48701 |
"num_input_tokens_seen": 40322576,
|
| 48702 |
"step": 29600
|
| 48703 |
},
|
|
|
|
| 49024 |
{
|
| 49025 |
"epoch": 91.97681607418856,
|
| 49026 |
"eval_loss": 1.9179975986480713,
|
| 49027 |
+
"eval_runtime": 11.2952,
|
| 49028 |
+
"eval_samples_per_second": 50.907,
|
| 49029 |
+
"eval_steps_per_second": 12.749,
|
| 49030 |
"num_input_tokens_seen": 40596016,
|
| 49031 |
"step": 29800
|
| 49032 |
},
|
|
|
|
| 49353 |
{
|
| 49354 |
"epoch": 92.5935085007728,
|
| 49355 |
"eval_loss": 1.92044997215271,
|
| 49356 |
+
"eval_runtime": 11.297,
|
| 49357 |
+
"eval_samples_per_second": 50.898,
|
| 49358 |
+
"eval_steps_per_second": 12.747,
|
| 49359 |
"num_input_tokens_seen": 40867568,
|
| 49360 |
"step": 30000
|
| 49361 |
},
|
|
|
|
| 49682 |
{
|
| 49683 |
"epoch": 93.21020092735704,
|
| 49684 |
"eval_loss": 1.9711647033691406,
|
| 49685 |
+
"eval_runtime": 11.3063,
|
| 49686 |
+
"eval_samples_per_second": 50.857,
|
| 49687 |
+
"eval_steps_per_second": 12.736,
|
| 49688 |
"num_input_tokens_seen": 41140848,
|
| 49689 |
"step": 30200
|
| 49690 |
},
|
|
|
|
| 50011 |
{
|
| 50012 |
"epoch": 93.82843894899537,
|
| 50013 |
"eval_loss": 1.976061463356018,
|
| 50014 |
+
"eval_runtime": 11.2906,
|
| 50015 |
+
"eval_samples_per_second": 50.927,
|
| 50016 |
+
"eval_steps_per_second": 12.754,
|
| 50017 |
"num_input_tokens_seen": 41412848,
|
| 50018 |
"step": 30400
|
| 50019 |
},
|
|
|
|
| 50340 |
{
|
| 50341 |
"epoch": 94.44513137557959,
|
| 50342 |
"eval_loss": 1.958508849143982,
|
| 50343 |
+
"eval_runtime": 11.2928,
|
| 50344 |
+
"eval_samples_per_second": 50.917,
|
| 50345 |
+
"eval_steps_per_second": 12.751,
|
| 50346 |
"num_input_tokens_seen": 41683920,
|
| 50347 |
"step": 30600
|
| 50348 |
},
|
|
|
|
| 50669 |
{
|
| 50670 |
"epoch": 95.06182380216383,
|
| 50671 |
"eval_loss": 1.9966574907302856,
|
| 50672 |
+
"eval_runtime": 11.302,
|
| 50673 |
+
"eval_samples_per_second": 50.876,
|
| 50674 |
+
"eval_steps_per_second": 12.741,
|
| 50675 |
"num_input_tokens_seen": 41959008,
|
| 50676 |
"step": 30800
|
| 50677 |
},
|
|
|
|
| 50998 |
{
|
| 50999 |
"epoch": 95.68006182380216,
|
| 51000 |
"eval_loss": 1.9950237274169922,
|
| 51001 |
+
"eval_runtime": 11.3056,
|
| 51002 |
+
"eval_samples_per_second": 50.86,
|
| 51003 |
+
"eval_steps_per_second": 12.737,
|
| 51004 |
"num_input_tokens_seen": 42231520,
|
| 51005 |
"step": 31000
|
| 51006 |
},
|
|
|
|
| 51327 |
{
|
| 51328 |
"epoch": 96.2967542503864,
|
| 51329 |
"eval_loss": 1.9839365482330322,
|
| 51330 |
+
"eval_runtime": 11.293,
|
| 51331 |
+
"eval_samples_per_second": 50.916,
|
| 51332 |
+
"eval_steps_per_second": 12.751,
|
| 51333 |
"num_input_tokens_seen": 42502416,
|
| 51334 |
"step": 31200
|
| 51335 |
},
|
|
|
|
| 51656 |
{
|
| 51657 |
"epoch": 96.91499227202473,
|
| 51658 |
"eval_loss": 2.004136323928833,
|
| 51659 |
+
"eval_runtime": 11.2961,
|
| 51660 |
+
"eval_samples_per_second": 50.902,
|
| 51661 |
+
"eval_steps_per_second": 12.748,
|
| 51662 |
"num_input_tokens_seen": 42776304,
|
| 51663 |
"step": 31400
|
| 51664 |
},
|
|
|
|
| 51985 |
{
|
| 51986 |
"epoch": 97.53168469860897,
|
| 51987 |
"eval_loss": 2.016206979751587,
|
| 51988 |
+
"eval_runtime": 11.3124,
|
| 51989 |
+
"eval_samples_per_second": 50.829,
|
| 51990 |
+
"eval_steps_per_second": 12.729,
|
| 51991 |
"num_input_tokens_seen": 43048176,
|
| 51992 |
"step": 31600
|
| 51993 |
},
|
|
|
|
| 52314 |
{
|
| 52315 |
"epoch": 98.14837712519319,
|
| 52316 |
"eval_loss": 2.010310173034668,
|
| 52317 |
+
"eval_runtime": 11.3207,
|
| 52318 |
+
"eval_samples_per_second": 50.792,
|
| 52319 |
+
"eval_steps_per_second": 12.72,
|
| 52320 |
"num_input_tokens_seen": 43320144,
|
| 52321 |
"step": 31800
|
| 52322 |
},
|
|
|
|
| 52643 |
{
|
| 52644 |
"epoch": 98.76661514683153,
|
| 52645 |
"eval_loss": 2.008124589920044,
|
| 52646 |
+
"eval_runtime": 11.2928,
|
| 52647 |
+
"eval_samples_per_second": 50.917,
|
| 52648 |
+
"eval_steps_per_second": 12.751,
|
| 52649 |
"num_input_tokens_seen": 43591728,
|
| 52650 |
"step": 32000
|
| 52651 |
},
|
|
|
|
| 52972 |
{
|
| 52973 |
"epoch": 99.38330757341576,
|
| 52974 |
"eval_loss": 2.027338981628418,
|
| 52975 |
+
"eval_runtime": 11.3086,
|
| 52976 |
+
"eval_samples_per_second": 50.846,
|
| 52977 |
+
"eval_steps_per_second": 12.734,
|
| 52978 |
"num_input_tokens_seen": 43866048,
|
| 52979 |
"step": 32200
|
| 52980 |
},
|
|
|
|
| 53301 |
{
|
| 53302 |
"epoch": 100.0,
|
| 53303 |
"eval_loss": 2.034688711166382,
|
| 53304 |
+
"eval_runtime": 11.2867,
|
| 53305 |
+
"eval_samples_per_second": 50.945,
|
| 53306 |
+
"eval_steps_per_second": 12.758,
|
| 53307 |
"num_input_tokens_seen": 44137040,
|
| 53308 |
"step": 32400
|
| 53309 |
},
|
|
|
|
| 53630 |
{
|
| 53631 |
"epoch": 100.61823802163833,
|
| 53632 |
"eval_loss": 2.052443265914917,
|
| 53633 |
+
"eval_runtime": 11.2954,
|
| 53634 |
+
"eval_samples_per_second": 50.905,
|
| 53635 |
+
"eval_steps_per_second": 12.749,
|
| 53636 |
"num_input_tokens_seen": 44408848,
|
| 53637 |
"step": 32600
|
| 53638 |
},
|
|
|
|
| 53959 |
{
|
| 53960 |
"epoch": 101.23493044822257,
|
| 53961 |
"eval_loss": 2.067155599594116,
|
| 53962 |
+
"eval_runtime": 11.3248,
|
| 53963 |
+
"eval_samples_per_second": 50.774,
|
| 53964 |
+
"eval_steps_per_second": 12.715,
|
| 53965 |
"num_input_tokens_seen": 44682912,
|
| 53966 |
"step": 32800
|
| 53967 |
},
|
|
|
|
| 54288 |
{
|
| 54289 |
"epoch": 101.85316846986089,
|
| 54290 |
"eval_loss": 2.042910575866699,
|
| 54291 |
+
"eval_runtime": 11.2924,
|
| 54292 |
+
"eval_samples_per_second": 50.919,
|
| 54293 |
+
"eval_steps_per_second": 12.752,
|
| 54294 |
"num_input_tokens_seen": 44956000,
|
| 54295 |
"step": 33000
|
| 54296 |
},
|
|
|
|
| 54617 |
{
|
| 54618 |
"epoch": 102.46986089644513,
|
| 54619 |
"eval_loss": 2.049968719482422,
|
| 54620 |
+
"eval_runtime": 11.2991,
|
| 54621 |
+
"eval_samples_per_second": 50.889,
|
| 54622 |
+
"eval_steps_per_second": 12.744,
|
| 54623 |
"num_input_tokens_seen": 45227824,
|
| 54624 |
"step": 33200
|
| 54625 |
},
|
|
|
|
| 54946 |
{
|
| 54947 |
"epoch": 103.08655332302936,
|
| 54948 |
"eval_loss": 2.047577142715454,
|
| 54949 |
+
"eval_runtime": 11.2983,
|
| 54950 |
+
"eval_samples_per_second": 50.893,
|
| 54951 |
+
"eval_steps_per_second": 12.745,
|
| 54952 |
"num_input_tokens_seen": 45498320,
|
| 54953 |
"step": 33400
|
| 54954 |
},
|
|
|
|
| 55275 |
{
|
| 55276 |
"epoch": 103.7047913446677,
|
| 55277 |
"eval_loss": 2.0636227130889893,
|
| 55278 |
+
"eval_runtime": 11.3222,
|
| 55279 |
+
"eval_samples_per_second": 50.785,
|
| 55280 |
+
"eval_steps_per_second": 12.718,
|
| 55281 |
"num_input_tokens_seen": 45773648,
|
| 55282 |
"step": 33600
|
| 55283 |
},
|
|
|
|
| 55604 |
{
|
| 55605 |
"epoch": 104.32148377125193,
|
| 55606 |
"eval_loss": 2.080799102783203,
|
| 55607 |
+
"eval_runtime": 11.3037,
|
| 55608 |
+
"eval_samples_per_second": 50.868,
|
| 55609 |
+
"eval_steps_per_second": 12.739,
|
| 55610 |
"num_input_tokens_seen": 46044128,
|
| 55611 |
"step": 33800
|
| 55612 |
},
|
|
|
|
| 55933 |
{
|
| 55934 |
"epoch": 104.93972179289027,
|
| 55935 |
"eval_loss": 2.0720911026000977,
|
| 55936 |
+
"eval_runtime": 11.3001,
|
| 55937 |
+
"eval_samples_per_second": 50.885,
|
| 55938 |
+
"eval_steps_per_second": 12.743,
|
| 55939 |
"num_input_tokens_seen": 46317504,
|
| 55940 |
"step": 34000
|
| 55941 |
},
|
|
|
|
| 56262 |
{
|
| 56263 |
"epoch": 105.5564142194745,
|
| 56264 |
"eval_loss": 2.082965612411499,
|
| 56265 |
+
"eval_runtime": 11.312,
|
| 56266 |
+
"eval_samples_per_second": 50.831,
|
| 56267 |
+
"eval_steps_per_second": 12.73,
|
| 56268 |
"num_input_tokens_seen": 46589024,
|
| 56269 |
"step": 34200
|
| 56270 |
},
|
|
|
|
| 56591 |
{
|
| 56592 |
"epoch": 106.17310664605873,
|
| 56593 |
"eval_loss": 2.094463348388672,
|
| 56594 |
+
"eval_runtime": 11.2952,
|
| 56595 |
+
"eval_samples_per_second": 50.906,
|
| 56596 |
+
"eval_steps_per_second": 12.749,
|
| 56597 |
"num_input_tokens_seen": 46863680,
|
| 56598 |
"step": 34400
|
| 56599 |
},
|
|
|
|
| 56920 |
{
|
| 56921 |
"epoch": 106.79134466769706,
|
| 56922 |
"eval_loss": 2.0966665744781494,
|
| 56923 |
+
"eval_runtime": 11.2915,
|
| 56924 |
+
"eval_samples_per_second": 50.923,
|
| 56925 |
+
"eval_steps_per_second": 12.753,
|
| 56926 |
"num_input_tokens_seen": 47135520,
|
| 56927 |
"step": 34600
|
| 56928 |
},
|
|
|
|
| 57249 |
{
|
| 57250 |
"epoch": 107.4080370942813,
|
| 57251 |
"eval_loss": 2.1042280197143555,
|
| 57252 |
+
"eval_runtime": 11.3301,
|
| 57253 |
+
"eval_samples_per_second": 50.75,
|
| 57254 |
+
"eval_steps_per_second": 12.709,
|
| 57255 |
"num_input_tokens_seen": 47407056,
|
| 57256 |
"step": 34800
|
| 57257 |
},
|
|
|
|
| 57578 |
{
|
| 57579 |
"epoch": 108.02472952086553,
|
| 57580 |
"eval_loss": 2.096859931945801,
|
| 57581 |
+
"eval_runtime": 11.2877,
|
| 57582 |
+
"eval_samples_per_second": 50.94,
|
| 57583 |
+
"eval_steps_per_second": 12.757,
|
| 57584 |
"num_input_tokens_seen": 47680112,
|
| 57585 |
"step": 35000
|
| 57586 |
},
|
|
|
|
| 57907 |
{
|
| 57908 |
"epoch": 108.64296754250387,
|
| 57909 |
"eval_loss": 2.1074352264404297,
|
| 57910 |
+
"eval_runtime": 11.2944,
|
| 57911 |
+
"eval_samples_per_second": 50.91,
|
| 57912 |
+
"eval_steps_per_second": 12.75,
|
| 57913 |
"num_input_tokens_seen": 47951632,
|
| 57914 |
"step": 35200
|
| 57915 |
},
|
|
|
|
| 58236 |
{
|
| 58237 |
"epoch": 109.2596599690881,
|
| 58238 |
"eval_loss": 2.110261917114258,
|
| 58239 |
+
"eval_runtime": 11.3012,
|
| 58240 |
+
"eval_samples_per_second": 50.879,
|
| 58241 |
+
"eval_steps_per_second": 12.742,
|
| 58242 |
"num_input_tokens_seen": 48224016,
|
| 58243 |
"step": 35400
|
| 58244 |
},
|
|
|
|
| 58565 |
{
|
| 58566 |
"epoch": 109.87789799072642,
|
| 58567 |
"eval_loss": 2.1071767807006836,
|
| 58568 |
+
"eval_runtime": 11.3117,
|
| 58569 |
+
"eval_samples_per_second": 50.832,
|
| 58570 |
+
"eval_steps_per_second": 12.73,
|
| 58571 |
"num_input_tokens_seen": 48497072,
|
| 58572 |
"step": 35600
|
| 58573 |
},
|
|
|
|
| 58894 |
{
|
| 58895 |
"epoch": 110.49459041731066,
|
| 58896 |
"eval_loss": 2.1081290245056152,
|
| 58897 |
+
"eval_runtime": 11.2959,
|
| 58898 |
+
"eval_samples_per_second": 50.904,
|
| 58899 |
+
"eval_steps_per_second": 12.748,
|
| 58900 |
"num_input_tokens_seen": 48768624,
|
| 58901 |
"step": 35800
|
| 58902 |
},
|
|
|
|
| 59223 |
{
|
| 59224 |
"epoch": 111.1112828438949,
|
| 59225 |
"eval_loss": 2.1115777492523193,
|
| 59226 |
+
"eval_runtime": 11.2933,
|
| 59227 |
+
"eval_samples_per_second": 50.915,
|
| 59228 |
+
"eval_steps_per_second": 12.751,
|
| 59229 |
"num_input_tokens_seen": 49041488,
|
| 59230 |
"step": 36000
|
| 59231 |
},
|
|
|
|
| 59552 |
{
|
| 59553 |
"epoch": 111.72952086553323,
|
| 59554 |
"eval_loss": 2.1243085861206055,
|
| 59555 |
+
"eval_runtime": 11.3106,
|
| 59556 |
+
"eval_samples_per_second": 50.837,
|
| 59557 |
+
"eval_steps_per_second": 12.731,
|
| 59558 |
"num_input_tokens_seen": 49314352,
|
| 59559 |
"step": 36200
|
| 59560 |
},
|
|
|
|
| 59881 |
{
|
| 59882 |
"epoch": 112.34621329211747,
|
| 59883 |
"eval_loss": 2.1214993000030518,
|
| 59884 |
+
"eval_runtime": 11.297,
|
| 59885 |
+
"eval_samples_per_second": 50.898,
|
| 59886 |
+
"eval_steps_per_second": 12.747,
|
| 59887 |
"num_input_tokens_seen": 49584848,
|
| 59888 |
"step": 36400
|
| 59889 |
},
|
|
|
|
| 60210 |
{
|
| 60211 |
"epoch": 112.9644513137558,
|
| 60212 |
"eval_loss": 2.1198999881744385,
|
| 60213 |
+
"eval_runtime": 11.3061,
|
| 60214 |
+
"eval_samples_per_second": 50.857,
|
| 60215 |
+
"eval_steps_per_second": 12.736,
|
| 60216 |
"num_input_tokens_seen": 49858864,
|
| 60217 |
"step": 36600
|
| 60218 |
},
|
|
|
|
| 60539 |
{
|
| 60540 |
"epoch": 113.58114374034002,
|
| 60541 |
"eval_loss": 2.129167079925537,
|
| 60542 |
+
"eval_runtime": 11.3074,
|
| 60543 |
+
"eval_samples_per_second": 50.852,
|
| 60544 |
+
"eval_steps_per_second": 12.735,
|
| 60545 |
"num_input_tokens_seen": 50130000,
|
| 60546 |
"step": 36800
|
| 60547 |
},
|
|
|
|
| 60868 |
{
|
| 60869 |
"epoch": 114.19783616692426,
|
| 60870 |
"eval_loss": 2.127554178237915,
|
| 60871 |
+
"eval_runtime": 11.3023,
|
| 60872 |
+
"eval_samples_per_second": 50.875,
|
| 60873 |
+
"eval_steps_per_second": 12.741,
|
| 60874 |
"num_input_tokens_seen": 50404128,
|
| 60875 |
"step": 37000
|
| 60876 |
},
|
|
|
|
| 61197 |
{
|
| 61198 |
"epoch": 114.8160741885626,
|
| 61199 |
"eval_loss": 2.1345906257629395,
|
| 61200 |
+
"eval_runtime": 11.2978,
|
| 61201 |
+
"eval_samples_per_second": 50.895,
|
| 61202 |
+
"eval_steps_per_second": 12.746,
|
| 61203 |
"num_input_tokens_seen": 50678112,
|
| 61204 |
"step": 37200
|
| 61205 |
},
|
|
|
|
| 61526 |
{
|
| 61527 |
"epoch": 115.43276661514683,
|
| 61528 |
"eval_loss": 2.132272481918335,
|
| 61529 |
+
"eval_runtime": 11.3176,
|
| 61530 |
+
"eval_samples_per_second": 50.806,
|
| 61531 |
+
"eval_steps_per_second": 12.724,
|
| 61532 |
"num_input_tokens_seen": 50946800,
|
| 61533 |
"step": 37400
|
| 61534 |
},
|
|
|
|
| 61855 |
{
|
| 61856 |
"epoch": 116.04945904173107,
|
| 61857 |
"eval_loss": 2.131873369216919,
|
| 61858 |
+
"eval_runtime": 11.3073,
|
| 61859 |
+
"eval_samples_per_second": 50.852,
|
| 61860 |
+
"eval_steps_per_second": 12.735,
|
| 61861 |
"num_input_tokens_seen": 51219680,
|
| 61862 |
"step": 37600
|
| 61863 |
},
|
|
|
|
| 62184 |
{
|
| 62185 |
"epoch": 116.6676970633694,
|
| 62186 |
"eval_loss": 2.1323955059051514,
|
| 62187 |
+
"eval_runtime": 11.2948,
|
| 62188 |
+
"eval_samples_per_second": 50.908,
|
| 62189 |
+
"eval_steps_per_second": 12.749,
|
| 62190 |
"num_input_tokens_seen": 51492544,
|
| 62191 |
"step": 37800
|
| 62192 |
},
|
|
|
|
| 62513 |
{
|
| 62514 |
"epoch": 117.28438948995363,
|
| 62515 |
"eval_loss": 2.135064125061035,
|
| 62516 |
+
"eval_runtime": 11.3036,
|
| 62517 |
+
"eval_samples_per_second": 50.869,
|
| 62518 |
+
"eval_steps_per_second": 12.739,
|
| 62519 |
"num_input_tokens_seen": 51764160,
|
| 62520 |
"step": 38000
|
| 62521 |
},
|
|
|
|
| 62842 |
{
|
| 62843 |
"epoch": 117.90262751159196,
|
| 62844 |
"eval_loss": 2.134946584701538,
|
| 62845 |
+
"eval_runtime": 11.3035,
|
| 62846 |
+
"eval_samples_per_second": 50.869,
|
| 62847 |
+
"eval_steps_per_second": 12.739,
|
| 62848 |
"num_input_tokens_seen": 52039488,
|
| 62849 |
"step": 38200
|
| 62850 |
},
|
|
|
|
| 63171 |
{
|
| 63172 |
"epoch": 118.5193199381762,
|
| 63173 |
"eval_loss": 2.1382298469543457,
|
| 63174 |
+
"eval_runtime": 11.2966,
|
| 63175 |
+
"eval_samples_per_second": 50.9,
|
| 63176 |
+
"eval_steps_per_second": 12.747,
|
| 63177 |
"num_input_tokens_seen": 52311648,
|
| 63178 |
"step": 38400
|
| 63179 |
},
|
|
|
|
| 63500 |
{
|
| 63501 |
"epoch": 119.13601236476043,
|
| 63502 |
"eval_loss": 2.1389503479003906,
|
| 63503 |
+
"eval_runtime": 11.299,
|
| 63504 |
+
"eval_samples_per_second": 50.89,
|
| 63505 |
+
"eval_steps_per_second": 12.745,
|
| 63506 |
"num_input_tokens_seen": 52584960,
|
| 63507 |
"step": 38600
|
| 63508 |
},
|
|
|
|
| 63829 |
{
|
| 63830 |
"epoch": 119.75425038639877,
|
| 63831 |
"eval_loss": 2.141028642654419,
|
| 63832 |
+
"eval_runtime": 11.2899,
|
| 63833 |
+
"eval_samples_per_second": 50.93,
|
| 63834 |
+
"eval_steps_per_second": 12.755,
|
| 63835 |
"num_input_tokens_seen": 52855712,
|
| 63836 |
"step": 38800
|
| 63837 |
},
|
|
|
|
| 64158 |
{
|
| 64159 |
"epoch": 120.370942812983,
|
| 64160 |
"eval_loss": 2.142845630645752,
|
| 64161 |
+
"eval_runtime": 12.65,
|
| 64162 |
+
"eval_samples_per_second": 45.454,
|
| 64163 |
+
"eval_steps_per_second": 11.383,
|
| 64164 |
"num_input_tokens_seen": 53128480,
|
| 64165 |
"step": 39000
|
| 64166 |
},
|
|
|
|
| 64487 |
{
|
| 64488 |
"epoch": 120.98918083462132,
|
| 64489 |
"eval_loss": 2.142850637435913,
|
| 64490 |
+
"eval_runtime": 11.2942,
|
| 64491 |
+
"eval_samples_per_second": 50.911,
|
| 64492 |
+
"eval_steps_per_second": 12.75,
|
| 64493 |
"num_input_tokens_seen": 53401056,
|
| 64494 |
"step": 39200
|
| 64495 |
},
|
|
|
|
| 64816 |
{
|
| 64817 |
"epoch": 121.60587326120556,
|
| 64818 |
"eval_loss": 2.141234874725342,
|
| 64819 |
+
"eval_runtime": 11.3357,
|
| 64820 |
+
"eval_samples_per_second": 50.725,
|
| 64821 |
+
"eval_steps_per_second": 12.703,
|
| 64822 |
"num_input_tokens_seen": 53673600,
|
| 64823 |
"step": 39400
|
| 64824 |
},
|
|
|
|
| 65145 |
{
|
| 65146 |
"epoch": 122.2225656877898,
|
| 65147 |
"eval_loss": 2.1376492977142334,
|
| 65148 |
+
"eval_runtime": 11.2938,
|
| 65149 |
+
"eval_samples_per_second": 50.913,
|
| 65150 |
+
"eval_steps_per_second": 12.75,
|
| 65151 |
"num_input_tokens_seen": 53943712,
|
| 65152 |
"step": 39600
|
| 65153 |
},
|
|
|
|
| 65474 |
{
|
| 65475 |
"epoch": 122.84080370942813,
|
| 65476 |
"eval_loss": 2.1381330490112305,
|
| 65477 |
+
"eval_runtime": 11.2911,
|
| 65478 |
+
"eval_samples_per_second": 50.925,
|
| 65479 |
+
"eval_steps_per_second": 12.753,
|
| 65480 |
"num_input_tokens_seen": 54217344,
|
| 65481 |
"step": 39800
|
| 65482 |
},
|
|
|
|
| 65803 |
{
|
| 65804 |
"epoch": 123.45749613601237,
|
| 65805 |
"eval_loss": 2.137033700942993,
|
| 65806 |
+
"eval_runtime": 11.2958,
|
| 65807 |
+
"eval_samples_per_second": 50.904,
|
| 65808 |
+
"eval_steps_per_second": 12.748,
|
| 65809 |
"num_input_tokens_seen": 54490336,
|
| 65810 |
"step": 40000
|
| 65811 |
},
|
|
|
|
| 65815 |
"step": 40000,
|
| 65816 |
"total_flos": 2.453675202191819e+18,
|
| 65817 |
"train_loss": 0.10362623064493919,
|
| 65818 |
+
"train_runtime": 29204.064,
|
| 65819 |
+
"train_samples_per_second": 21.915,
|
| 65820 |
+
"train_steps_per_second": 1.37
|
| 65821 |
}
|
| 65822 |
],
|
| 65823 |
"logging_steps": 5,
|