rbelanec commited on
Commit
f73b660
·
verified ·
1 Parent(s): f31ef57

End of training

Browse files
Files changed (5) hide show
  1. README.md +2 -2
  2. all_results.json +6 -6
  3. eval_results.json +3 -3
  4. train_results.json +3 -3
  5. trainer_state.json +603 -603
README.md CHANGED
@@ -16,9 +16,9 @@ should probably proofread and complete it, then remove this comment. -->
16
 
17
  # train_stsb_1745333591
18
 
19
- This model is a fine-tuned version of [meta-llama/Meta-Llama-3-8B-Instruct](https://huggingface.co/meta-llama/Meta-Llama-3-8B-Instruct) on an unknown dataset.
20
  It achieves the following results on the evaluation set:
21
- - Loss: 2.1370
22
  - Num Input Tokens Seen: 54490336
23
 
24
  ## Model description
 
16
 
17
  # train_stsb_1745333591
18
 
19
+ This model is a fine-tuned version of [meta-llama/Meta-Llama-3-8B-Instruct](https://huggingface.co/meta-llama/Meta-Llama-3-8B-Instruct) on the stsb dataset.
20
  It achieves the following results on the evaluation set:
21
+ - Loss: 0.5494
22
  - Num Input Tokens Seen: 54490336
23
 
24
  ## Model description
all_results.json CHANGED
@@ -1,13 +1,13 @@
1
  {
2
  "epoch": 123.45749613601237,
3
  "eval_loss": 0.5493518114089966,
4
- "eval_runtime": 11.2648,
5
- "eval_samples_per_second": 51.044,
6
- "eval_steps_per_second": 12.783,
7
  "num_input_tokens_seen": 54490336,
8
  "total_flos": 2.453675202191819e+18,
9
  "train_loss": 0.10362623064493919,
10
- "train_runtime": 29327.4396,
11
- "train_samples_per_second": 21.823,
12
- "train_steps_per_second": 1.364
13
  }
 
1
  {
2
  "epoch": 123.45749613601237,
3
  "eval_loss": 0.5493518114089966,
4
+ "eval_runtime": 11.3055,
5
+ "eval_samples_per_second": 50.86,
6
+ "eval_steps_per_second": 12.737,
7
  "num_input_tokens_seen": 54490336,
8
  "total_flos": 2.453675202191819e+18,
9
  "train_loss": 0.10362623064493919,
10
+ "train_runtime": 29204.064,
11
+ "train_samples_per_second": 21.915,
12
+ "train_steps_per_second": 1.37
13
  }
eval_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 123.45749613601237,
3
  "eval_loss": 0.5493518114089966,
4
- "eval_runtime": 11.2648,
5
- "eval_samples_per_second": 51.044,
6
- "eval_steps_per_second": 12.783,
7
  "num_input_tokens_seen": 54490336
8
  }
 
1
  {
2
  "epoch": 123.45749613601237,
3
  "eval_loss": 0.5493518114089966,
4
+ "eval_runtime": 11.3055,
5
+ "eval_samples_per_second": 50.86,
6
+ "eval_steps_per_second": 12.737,
7
  "num_input_tokens_seen": 54490336
8
  }
train_results.json CHANGED
@@ -3,7 +3,7 @@
3
  "num_input_tokens_seen": 54490336,
4
  "total_flos": 2.453675202191819e+18,
5
  "train_loss": 0.10362623064493919,
6
- "train_runtime": 29327.4396,
7
- "train_samples_per_second": 21.823,
8
- "train_steps_per_second": 1.364
9
  }
 
3
  "num_input_tokens_seen": 54490336,
4
  "total_flos": 2.453675202191819e+18,
5
  "train_loss": 0.10362623064493919,
6
+ "train_runtime": 29204.064,
7
+ "train_samples_per_second": 21.915,
8
+ "train_steps_per_second": 1.37
9
  }
trainer_state.json CHANGED
@@ -332,9 +332,9 @@
332
  {
333
  "epoch": 0.6182380216383307,
334
  "eval_loss": 0.9577658176422119,
335
- "eval_runtime": 11.2148,
336
- "eval_samples_per_second": 51.271,
337
- "eval_steps_per_second": 12.84,
338
  "num_input_tokens_seen": 272576,
339
  "step": 200
340
  },
@@ -661,9 +661,9 @@
661
  {
662
  "epoch": 1.2349304482225656,
663
  "eval_loss": 0.7184381484985352,
664
- "eval_runtime": 11.2254,
665
- "eval_samples_per_second": 51.223,
666
- "eval_steps_per_second": 12.828,
667
  "num_input_tokens_seen": 544096,
668
  "step": 400
669
  },
@@ -990,9 +990,9 @@
990
  {
991
  "epoch": 1.8531684698608966,
992
  "eval_loss": 0.6815493106842041,
993
- "eval_runtime": 11.2454,
994
- "eval_samples_per_second": 51.132,
995
- "eval_steps_per_second": 12.805,
996
  "num_input_tokens_seen": 818048,
997
  "step": 600
998
  },
@@ -1319,9 +1319,9 @@
1319
  {
1320
  "epoch": 2.469860896445131,
1321
  "eval_loss": 0.6753404140472412,
1322
- "eval_runtime": 11.2293,
1323
- "eval_samples_per_second": 51.205,
1324
- "eval_steps_per_second": 12.824,
1325
  "num_input_tokens_seen": 1089600,
1326
  "step": 800
1327
  },
@@ -1648,9 +1648,9 @@
1648
  {
1649
  "epoch": 3.0865533230293662,
1650
  "eval_loss": 0.6587666273117065,
1651
- "eval_runtime": 11.228,
1652
- "eval_samples_per_second": 51.211,
1653
- "eval_steps_per_second": 12.825,
1654
  "num_input_tokens_seen": 1361504,
1655
  "step": 1000
1656
  },
@@ -1977,9 +1977,9 @@
1977
  {
1978
  "epoch": 3.704791344667697,
1979
  "eval_loss": 0.6507958173751831,
1980
- "eval_runtime": 11.2113,
1981
- "eval_samples_per_second": 51.288,
1982
- "eval_steps_per_second": 12.844,
1983
  "num_input_tokens_seen": 1636960,
1984
  "step": 1200
1985
  },
@@ -2306,9 +2306,9 @@
2306
  {
2307
  "epoch": 4.321483771251932,
2308
  "eval_loss": 0.6580312252044678,
2309
- "eval_runtime": 11.2184,
2310
- "eval_samples_per_second": 51.255,
2311
- "eval_steps_per_second": 12.836,
2312
  "num_input_tokens_seen": 1909696,
2313
  "step": 1400
2314
  },
@@ -2635,9 +2635,9 @@
2635
  {
2636
  "epoch": 4.939721792890262,
2637
  "eval_loss": 0.6381492614746094,
2638
- "eval_runtime": 11.2079,
2639
- "eval_samples_per_second": 51.303,
2640
- "eval_steps_per_second": 12.848,
2641
  "num_input_tokens_seen": 2182656,
2642
  "step": 1600
2643
  },
@@ -2964,9 +2964,9 @@
2964
  {
2965
  "epoch": 5.556414219474497,
2966
  "eval_loss": 0.6330167055130005,
2967
- "eval_runtime": 11.3399,
2968
- "eval_samples_per_second": 50.706,
2969
- "eval_steps_per_second": 12.699,
2970
  "num_input_tokens_seen": 2453904,
2971
  "step": 1800
2972
  },
@@ -3293,9 +3293,9 @@
3293
  {
3294
  "epoch": 6.1731066460587325,
3295
  "eval_loss": 0.6232376098632812,
3296
- "eval_runtime": 11.2806,
3297
- "eval_samples_per_second": 50.972,
3298
- "eval_steps_per_second": 12.765,
3299
  "num_input_tokens_seen": 2727984,
3300
  "step": 2000
3301
  },
@@ -3622,9 +3622,9 @@
3622
  {
3623
  "epoch": 6.7913446676970635,
3624
  "eval_loss": 0.6167892813682556,
3625
- "eval_runtime": 11.2823,
3626
- "eval_samples_per_second": 50.965,
3627
- "eval_steps_per_second": 12.763,
3628
  "num_input_tokens_seen": 2999760,
3629
  "step": 2200
3630
  },
@@ -3951,9 +3951,9 @@
3951
  {
3952
  "epoch": 7.4080370942812985,
3953
  "eval_loss": 0.5621501207351685,
3954
- "eval_runtime": 11.2613,
3955
- "eval_samples_per_second": 51.06,
3956
- "eval_steps_per_second": 12.787,
3957
  "num_input_tokens_seen": 3274528,
3958
  "step": 2400
3959
  },
@@ -4280,9 +4280,9 @@
4280
  {
4281
  "epoch": 8.024729520865533,
4282
  "eval_loss": 0.5813793540000916,
4283
- "eval_runtime": 11.2929,
4284
- "eval_samples_per_second": 50.917,
4285
- "eval_steps_per_second": 12.751,
4286
  "num_input_tokens_seen": 3546880,
4287
  "step": 2600
4288
  },
@@ -4609,9 +4609,9 @@
4609
  {
4610
  "epoch": 8.642967542503865,
4611
  "eval_loss": 0.5915025472640991,
4612
- "eval_runtime": 11.2898,
4613
- "eval_samples_per_second": 50.931,
4614
- "eval_steps_per_second": 12.755,
4615
  "num_input_tokens_seen": 3821184,
4616
  "step": 2800
4617
  },
@@ -4938,9 +4938,9 @@
4938
  {
4939
  "epoch": 9.2596599690881,
4940
  "eval_loss": 0.5584082007408142,
4941
- "eval_runtime": 11.2915,
4942
- "eval_samples_per_second": 50.923,
4943
- "eval_steps_per_second": 12.753,
4944
  "num_input_tokens_seen": 4090704,
4945
  "step": 3000
4946
  },
@@ -5267,9 +5267,9 @@
5267
  {
5268
  "epoch": 9.87789799072643,
5269
  "eval_loss": 0.562062680721283,
5270
- "eval_runtime": 11.3185,
5271
- "eval_samples_per_second": 50.802,
5272
- "eval_steps_per_second": 12.723,
5273
  "num_input_tokens_seen": 4363696,
5274
  "step": 3200
5275
  },
@@ -5596,9 +5596,9 @@
5596
  {
5597
  "epoch": 10.494590417310665,
5598
  "eval_loss": 0.5493518114089966,
5599
- "eval_runtime": 11.2869,
5600
- "eval_samples_per_second": 50.944,
5601
- "eval_steps_per_second": 12.758,
5602
  "num_input_tokens_seen": 4636656,
5603
  "step": 3400
5604
  },
@@ -5925,9 +5925,9 @@
5925
  {
5926
  "epoch": 11.1112828438949,
5927
  "eval_loss": 0.5832644701004028,
5928
- "eval_runtime": 11.3381,
5929
- "eval_samples_per_second": 50.714,
5930
- "eval_steps_per_second": 12.7,
5931
  "num_input_tokens_seen": 4908928,
5932
  "step": 3600
5933
  },
@@ -6254,9 +6254,9 @@
6254
  {
6255
  "epoch": 11.72952086553323,
6256
  "eval_loss": 0.5668447017669678,
6257
- "eval_runtime": 11.3233,
6258
- "eval_samples_per_second": 50.78,
6259
- "eval_steps_per_second": 12.717,
6260
  "num_input_tokens_seen": 5179040,
6261
  "step": 3800
6262
  },
@@ -6583,9 +6583,9 @@
6583
  {
6584
  "epoch": 12.346213292117465,
6585
  "eval_loss": 0.5749086737632751,
6586
- "eval_runtime": 11.3249,
6587
- "eval_samples_per_second": 50.773,
6588
- "eval_steps_per_second": 12.715,
6589
  "num_input_tokens_seen": 5452192,
6590
  "step": 4000
6591
  },
@@ -6912,9 +6912,9 @@
6912
  {
6913
  "epoch": 12.964451313755795,
6914
  "eval_loss": 0.564673900604248,
6915
- "eval_runtime": 11.3159,
6916
- "eval_samples_per_second": 50.813,
6917
- "eval_steps_per_second": 12.725,
6918
  "num_input_tokens_seen": 5724448,
6919
  "step": 4200
6920
  },
@@ -7241,9 +7241,9 @@
7241
  {
7242
  "epoch": 13.58114374034003,
7243
  "eval_loss": 0.557171642780304,
7244
- "eval_runtime": 11.3161,
7245
- "eval_samples_per_second": 50.813,
7246
- "eval_steps_per_second": 12.725,
7247
  "num_input_tokens_seen": 5998032,
7248
  "step": 4400
7249
  },
@@ -7570,9 +7570,9 @@
7570
  {
7571
  "epoch": 14.197836166924265,
7572
  "eval_loss": 0.5686624050140381,
7573
- "eval_runtime": 11.3282,
7574
- "eval_samples_per_second": 50.758,
7575
- "eval_steps_per_second": 12.712,
7576
  "num_input_tokens_seen": 6269792,
7577
  "step": 4600
7578
  },
@@ -7899,9 +7899,9 @@
7899
  {
7900
  "epoch": 14.816074188562597,
7901
  "eval_loss": 0.5626024603843689,
7902
- "eval_runtime": 11.3545,
7903
- "eval_samples_per_second": 50.641,
7904
- "eval_steps_per_second": 12.682,
7905
  "num_input_tokens_seen": 6541248,
7906
  "step": 4800
7907
  },
@@ -8228,9 +8228,9 @@
8228
  {
8229
  "epoch": 15.432766615146832,
8230
  "eval_loss": 0.5851988196372986,
8231
- "eval_runtime": 11.3221,
8232
- "eval_samples_per_second": 50.786,
8233
- "eval_steps_per_second": 12.719,
8234
  "num_input_tokens_seen": 6815200,
8235
  "step": 5000
8236
  },
@@ -8557,9 +8557,9 @@
8557
  {
8558
  "epoch": 16.049459041731065,
8559
  "eval_loss": 0.6189093589782715,
8560
- "eval_runtime": 11.3258,
8561
- "eval_samples_per_second": 50.769,
8562
- "eval_steps_per_second": 12.714,
8563
  "num_input_tokens_seen": 7086224,
8564
  "step": 5200
8565
  },
@@ -8886,9 +8886,9 @@
8886
  {
8887
  "epoch": 16.667697063369395,
8888
  "eval_loss": 0.6122633814811707,
8889
- "eval_runtime": 11.3336,
8890
- "eval_samples_per_second": 50.734,
8891
- "eval_steps_per_second": 12.706,
8892
  "num_input_tokens_seen": 7360560,
8893
  "step": 5400
8894
  },
@@ -9215,9 +9215,9 @@
9215
  {
9216
  "epoch": 17.284389489953632,
9217
  "eval_loss": 0.611182689666748,
9218
- "eval_runtime": 11.3271,
9219
- "eval_samples_per_second": 50.763,
9220
- "eval_steps_per_second": 12.713,
9221
  "num_input_tokens_seen": 7632240,
9222
  "step": 5600
9223
  },
@@ -9544,9 +9544,9 @@
9544
  {
9545
  "epoch": 17.902627511591962,
9546
  "eval_loss": 0.5843232274055481,
9547
- "eval_runtime": 11.3479,
9548
- "eval_samples_per_second": 50.67,
9549
- "eval_steps_per_second": 12.69,
9550
  "num_input_tokens_seen": 7904432,
9551
  "step": 5800
9552
  },
@@ -9873,9 +9873,9 @@
9873
  {
9874
  "epoch": 18.5193199381762,
9875
  "eval_loss": 0.6198561191558838,
9876
- "eval_runtime": 11.325,
9877
- "eval_samples_per_second": 50.773,
9878
- "eval_steps_per_second": 12.715,
9879
  "num_input_tokens_seen": 8177168,
9880
  "step": 6000
9881
  },
@@ -10202,9 +10202,9 @@
10202
  {
10203
  "epoch": 19.136012364760433,
10204
  "eval_loss": 0.6794010996818542,
10205
- "eval_runtime": 11.3249,
10206
- "eval_samples_per_second": 50.773,
10207
- "eval_steps_per_second": 12.715,
10208
  "num_input_tokens_seen": 8449968,
10209
  "step": 6200
10210
  },
@@ -10531,9 +10531,9 @@
10531
  {
10532
  "epoch": 19.754250386398763,
10533
  "eval_loss": 0.6374606490135193,
10534
- "eval_runtime": 11.3306,
10535
- "eval_samples_per_second": 50.748,
10536
- "eval_steps_per_second": 12.709,
10537
  "num_input_tokens_seen": 8722992,
10538
  "step": 6400
10539
  },
@@ -10860,9 +10860,9 @@
10860
  {
10861
  "epoch": 20.370942812983,
10862
  "eval_loss": 0.6705669164657593,
10863
- "eval_runtime": 11.3299,
10864
- "eval_samples_per_second": 50.751,
10865
- "eval_steps_per_second": 12.71,
10866
  "num_input_tokens_seen": 8996224,
10867
  "step": 6600
10868
  },
@@ -11189,9 +11189,9 @@
11189
  {
11190
  "epoch": 20.98918083462133,
11191
  "eval_loss": 0.648054838180542,
11192
- "eval_runtime": 11.3243,
11193
- "eval_samples_per_second": 50.776,
11194
- "eval_steps_per_second": 12.716,
11195
  "num_input_tokens_seen": 9269504,
11196
  "step": 6800
11197
  },
@@ -11518,9 +11518,9 @@
11518
  {
11519
  "epoch": 21.605873261205563,
11520
  "eval_loss": 0.7299332618713379,
11521
- "eval_runtime": 11.3174,
11522
- "eval_samples_per_second": 50.807,
11523
- "eval_steps_per_second": 12.724,
11524
  "num_input_tokens_seen": 9542432,
11525
  "step": 7000
11526
  },
@@ -11847,9 +11847,9 @@
11847
  {
11848
  "epoch": 22.2225656877898,
11849
  "eval_loss": 0.7840644717216492,
11850
- "eval_runtime": 11.3356,
11851
- "eval_samples_per_second": 50.725,
11852
- "eval_steps_per_second": 12.703,
11853
  "num_input_tokens_seen": 9812704,
11854
  "step": 7200
11855
  },
@@ -12176,9 +12176,9 @@
12176
  {
12177
  "epoch": 22.84080370942813,
12178
  "eval_loss": 0.7381678819656372,
12179
- "eval_runtime": 11.3316,
12180
- "eval_samples_per_second": 50.743,
12181
- "eval_steps_per_second": 12.708,
12182
  "num_input_tokens_seen": 10086272,
12183
  "step": 7400
12184
  },
@@ -12505,9 +12505,9 @@
12505
  {
12506
  "epoch": 23.457496136012363,
12507
  "eval_loss": 0.7728149890899658,
12508
- "eval_runtime": 11.3287,
12509
- "eval_samples_per_second": 50.756,
12510
- "eval_steps_per_second": 12.711,
12511
  "num_input_tokens_seen": 10358832,
12512
  "step": 7600
12513
  },
@@ -12834,9 +12834,9 @@
12834
  {
12835
  "epoch": 24.0741885625966,
12836
  "eval_loss": 0.8268849849700928,
12837
- "eval_runtime": 11.5298,
12838
- "eval_samples_per_second": 49.871,
12839
- "eval_steps_per_second": 12.489,
12840
  "num_input_tokens_seen": 10630000,
12841
  "step": 7800
12842
  },
@@ -13163,9 +13163,9 @@
13163
  {
13164
  "epoch": 24.69242658423493,
13165
  "eval_loss": 0.8175145983695984,
13166
- "eval_runtime": 11.3248,
13167
- "eval_samples_per_second": 50.774,
13168
- "eval_steps_per_second": 12.715,
13169
  "num_input_tokens_seen": 10904880,
13170
  "step": 8000
13171
  },
@@ -13492,9 +13492,9 @@
13492
  {
13493
  "epoch": 25.309119010819167,
13494
  "eval_loss": 0.8719689249992371,
13495
- "eval_runtime": 11.3173,
13496
- "eval_samples_per_second": 50.807,
13497
- "eval_steps_per_second": 12.724,
13498
  "num_input_tokens_seen": 11176208,
13499
  "step": 8200
13500
  },
@@ -13821,9 +13821,9 @@
13821
  {
13822
  "epoch": 25.927357032457497,
13823
  "eval_loss": 0.9041878581047058,
13824
- "eval_runtime": 11.3491,
13825
- "eval_samples_per_second": 50.665,
13826
- "eval_steps_per_second": 12.688,
13827
  "num_input_tokens_seen": 11451344,
13828
  "step": 8400
13829
  },
@@ -14150,9 +14150,9 @@
14150
  {
14151
  "epoch": 26.54404945904173,
14152
  "eval_loss": 0.8620166778564453,
14153
- "eval_runtime": 11.3331,
14154
- "eval_samples_per_second": 50.736,
14155
- "eval_steps_per_second": 12.706,
14156
  "num_input_tokens_seen": 11723328,
14157
  "step": 8600
14158
  },
@@ -14479,9 +14479,9 @@
14479
  {
14480
  "epoch": 27.160741885625967,
14481
  "eval_loss": 0.9756768345832825,
14482
- "eval_runtime": 11.3464,
14483
- "eval_samples_per_second": 50.677,
14484
- "eval_steps_per_second": 12.691,
14485
  "num_input_tokens_seen": 11996224,
14486
  "step": 8800
14487
  },
@@ -14808,9 +14808,9 @@
14808
  {
14809
  "epoch": 27.778979907264297,
14810
  "eval_loss": 0.9385554194450378,
14811
- "eval_runtime": 11.3441,
14812
- "eval_samples_per_second": 50.687,
14813
- "eval_steps_per_second": 12.694,
14814
  "num_input_tokens_seen": 12267520,
14815
  "step": 9000
14816
  },
@@ -15137,9 +15137,9 @@
15137
  {
15138
  "epoch": 28.39567233384853,
15139
  "eval_loss": 0.9237757921218872,
15140
- "eval_runtime": 11.3148,
15141
- "eval_samples_per_second": 50.819,
15142
- "eval_steps_per_second": 12.727,
15143
  "num_input_tokens_seen": 12542064,
15144
  "step": 9200
15145
  },
@@ -15466,9 +15466,9 @@
15466
  {
15467
  "epoch": 29.012364760432767,
15468
  "eval_loss": 1.064571499824524,
15469
- "eval_runtime": 11.3576,
15470
- "eval_samples_per_second": 50.627,
15471
- "eval_steps_per_second": 12.679,
15472
  "num_input_tokens_seen": 12812048,
15473
  "step": 9400
15474
  },
@@ -15795,9 +15795,9 @@
15795
  {
15796
  "epoch": 29.630602782071097,
15797
  "eval_loss": 1.0749653577804565,
15798
- "eval_runtime": 11.3204,
15799
- "eval_samples_per_second": 50.793,
15800
- "eval_steps_per_second": 12.72,
15801
  "num_input_tokens_seen": 13085264,
15802
  "step": 9600
15803
  },
@@ -16124,9 +16124,9 @@
16124
  {
16125
  "epoch": 30.24729520865533,
16126
  "eval_loss": 1.0077648162841797,
16127
- "eval_runtime": 11.3544,
16128
- "eval_samples_per_second": 50.641,
16129
- "eval_steps_per_second": 12.682,
16130
  "num_input_tokens_seen": 13356384,
16131
  "step": 9800
16132
  },
@@ -16453,9 +16453,9 @@
16453
  {
16454
  "epoch": 30.865533230293664,
16455
  "eval_loss": 1.057982325553894,
16456
- "eval_runtime": 11.3281,
16457
- "eval_samples_per_second": 50.759,
16458
- "eval_steps_per_second": 12.712,
16459
  "num_input_tokens_seen": 13629216,
16460
  "step": 10000
16461
  },
@@ -16782,9 +16782,9 @@
16782
  {
16783
  "epoch": 31.482225656877898,
16784
  "eval_loss": 1.0450738668441772,
16785
- "eval_runtime": 11.3237,
16786
- "eval_samples_per_second": 50.779,
16787
- "eval_steps_per_second": 12.717,
16788
  "num_input_tokens_seen": 13902736,
16789
  "step": 10200
16790
  },
@@ -17111,9 +17111,9 @@
17111
  {
17112
  "epoch": 32.09891808346213,
17113
  "eval_loss": 1.0477303266525269,
17114
- "eval_runtime": 11.3224,
17115
- "eval_samples_per_second": 50.784,
17116
- "eval_steps_per_second": 12.718,
17117
  "num_input_tokens_seen": 14174192,
17118
  "step": 10400
17119
  },
@@ -17440,9 +17440,9 @@
17440
  {
17441
  "epoch": 32.717156105100464,
17442
  "eval_loss": 1.14347243309021,
17443
- "eval_runtime": 11.3251,
17444
- "eval_samples_per_second": 50.772,
17445
- "eval_steps_per_second": 12.715,
17446
  "num_input_tokens_seen": 14448176,
17447
  "step": 10600
17448
  },
@@ -17769,9 +17769,9 @@
17769
  {
17770
  "epoch": 33.3338485316847,
17771
  "eval_loss": 1.0730254650115967,
17772
- "eval_runtime": 11.3372,
17773
- "eval_samples_per_second": 50.718,
17774
- "eval_steps_per_second": 12.702,
17775
  "num_input_tokens_seen": 14718096,
17776
  "step": 10800
17777
  },
@@ -18098,9 +18098,9 @@
18098
  {
18099
  "epoch": 33.95208655332303,
18100
  "eval_loss": 1.0351147651672363,
18101
- "eval_runtime": 11.3273,
18102
- "eval_samples_per_second": 50.762,
18103
- "eval_steps_per_second": 12.713,
18104
  "num_input_tokens_seen": 14992048,
18105
  "step": 11000
18106
  },
@@ -18427,9 +18427,9 @@
18427
  {
18428
  "epoch": 34.568778979907265,
18429
  "eval_loss": 1.1394553184509277,
18430
- "eval_runtime": 11.3296,
18431
- "eval_samples_per_second": 50.752,
18432
- "eval_steps_per_second": 12.71,
18433
  "num_input_tokens_seen": 15265072,
18434
  "step": 11200
18435
  },
@@ -18756,9 +18756,9 @@
18756
  {
18757
  "epoch": 35.1854714064915,
18758
  "eval_loss": 1.1201566457748413,
18759
- "eval_runtime": 11.327,
18760
- "eval_samples_per_second": 50.764,
18761
- "eval_steps_per_second": 12.713,
18762
  "num_input_tokens_seen": 15538960,
18763
  "step": 11400
18764
  },
@@ -19085,9 +19085,9 @@
19085
  {
19086
  "epoch": 35.80370942812983,
19087
  "eval_loss": 1.133685827255249,
19088
- "eval_runtime": 11.3415,
19089
- "eval_samples_per_second": 50.699,
19090
- "eval_steps_per_second": 12.697,
19091
  "num_input_tokens_seen": 15812880,
19092
  "step": 11600
19093
  },
@@ -19414,9 +19414,9 @@
19414
  {
19415
  "epoch": 36.420401854714065,
19416
  "eval_loss": 1.1776589155197144,
19417
- "eval_runtime": 11.336,
19418
- "eval_samples_per_second": 50.723,
19419
- "eval_steps_per_second": 12.703,
19420
  "num_input_tokens_seen": 16082608,
19421
  "step": 11800
19422
  },
@@ -19743,9 +19743,9 @@
19743
  {
19744
  "epoch": 37.0370942812983,
19745
  "eval_loss": 1.18972909450531,
19746
- "eval_runtime": 11.3254,
19747
- "eval_samples_per_second": 50.771,
19748
- "eval_steps_per_second": 12.715,
19749
  "num_input_tokens_seen": 16357888,
19750
  "step": 12000
19751
  },
@@ -20072,9 +20072,9 @@
20072
  {
20073
  "epoch": 37.65533230293663,
20074
  "eval_loss": 1.2221449613571167,
20075
- "eval_runtime": 11.351,
20076
- "eval_samples_per_second": 50.656,
20077
- "eval_steps_per_second": 12.686,
20078
  "num_input_tokens_seen": 16627872,
20079
  "step": 12200
20080
  },
@@ -20401,9 +20401,9 @@
20401
  {
20402
  "epoch": 38.272024729520865,
20403
  "eval_loss": 1.169758677482605,
20404
- "eval_runtime": 11.3289,
20405
- "eval_samples_per_second": 50.755,
20406
- "eval_steps_per_second": 12.711,
20407
  "num_input_tokens_seen": 16900336,
20408
  "step": 12400
20409
  },
@@ -20730,9 +20730,9 @@
20730
  {
20731
  "epoch": 38.8902627511592,
20732
  "eval_loss": 1.1674479246139526,
20733
- "eval_runtime": 11.3543,
20734
- "eval_samples_per_second": 50.642,
20735
- "eval_steps_per_second": 12.682,
20736
  "num_input_tokens_seen": 17175024,
20737
  "step": 12600
20738
  },
@@ -21059,9 +21059,9 @@
21059
  {
21060
  "epoch": 39.50695517774343,
21061
  "eval_loss": 1.1664071083068848,
21062
- "eval_runtime": 11.3286,
21063
- "eval_samples_per_second": 50.757,
21064
- "eval_steps_per_second": 12.711,
21065
  "num_input_tokens_seen": 17446864,
21066
  "step": 12800
21067
  },
@@ -21388,9 +21388,9 @@
21388
  {
21389
  "epoch": 40.123647604327665,
21390
  "eval_loss": 1.2493196725845337,
21391
- "eval_runtime": 11.3259,
21392
- "eval_samples_per_second": 50.769,
21393
- "eval_steps_per_second": 12.714,
21394
  "num_input_tokens_seen": 17716560,
21395
  "step": 13000
21396
  },
@@ -21717,9 +21717,9 @@
21717
  {
21718
  "epoch": 40.741885625966,
21719
  "eval_loss": 1.3186978101730347,
21720
- "eval_runtime": 11.3339,
21721
- "eval_samples_per_second": 50.733,
21722
- "eval_steps_per_second": 12.705,
21723
  "num_input_tokens_seen": 17991792,
21724
  "step": 13200
21725
  },
@@ -22046,9 +22046,9 @@
22046
  {
22047
  "epoch": 41.35857805255023,
22048
  "eval_loss": 1.256793737411499,
22049
- "eval_runtime": 11.3245,
22050
- "eval_samples_per_second": 50.775,
22051
- "eval_steps_per_second": 12.716,
22052
  "num_input_tokens_seen": 18262992,
22053
  "step": 13400
22054
  },
@@ -22375,9 +22375,9 @@
22375
  {
22376
  "epoch": 41.97681607418856,
22377
  "eval_loss": 1.2447845935821533,
22378
- "eval_runtime": 11.3516,
22379
- "eval_samples_per_second": 50.654,
22380
- "eval_steps_per_second": 12.685,
22381
  "num_input_tokens_seen": 18536880,
22382
  "step": 13600
22383
  },
@@ -22704,9 +22704,9 @@
22704
  {
22705
  "epoch": 42.5935085007728,
22706
  "eval_loss": 1.233655333518982,
22707
- "eval_runtime": 11.3337,
22708
- "eval_samples_per_second": 50.734,
22709
- "eval_steps_per_second": 12.705,
22710
  "num_input_tokens_seen": 18806784,
22711
  "step": 13800
22712
  },
@@ -23033,9 +23033,9 @@
23033
  {
23034
  "epoch": 43.210200927357036,
23035
  "eval_loss": 1.254447102546692,
23036
- "eval_runtime": 11.3281,
23037
- "eval_samples_per_second": 50.759,
23038
- "eval_steps_per_second": 12.712,
23039
  "num_input_tokens_seen": 19080608,
23040
  "step": 14000
23041
  },
@@ -23362,9 +23362,9 @@
23362
  {
23363
  "epoch": 43.82843894899536,
23364
  "eval_loss": 1.3475619554519653,
23365
- "eval_runtime": 11.3651,
23366
- "eval_samples_per_second": 50.593,
23367
- "eval_steps_per_second": 12.67,
23368
  "num_input_tokens_seen": 19352320,
23369
  "step": 14200
23370
  },
@@ -23691,9 +23691,9 @@
23691
  {
23692
  "epoch": 44.4451313755796,
23693
  "eval_loss": 1.2956358194351196,
23694
- "eval_runtime": 11.3316,
23695
- "eval_samples_per_second": 50.743,
23696
- "eval_steps_per_second": 12.708,
23697
  "num_input_tokens_seen": 19624544,
23698
  "step": 14400
23699
  },
@@ -24020,9 +24020,9 @@
24020
  {
24021
  "epoch": 45.061823802163836,
24022
  "eval_loss": 1.214294195175171,
24023
- "eval_runtime": 11.3366,
24024
- "eval_samples_per_second": 50.721,
24025
- "eval_steps_per_second": 12.702,
24026
  "num_input_tokens_seen": 19896064,
24027
  "step": 14600
24028
  },
@@ -24349,9 +24349,9 @@
24349
  {
24350
  "epoch": 45.68006182380216,
24351
  "eval_loss": 1.200486660003662,
24352
- "eval_runtime": 11.3392,
24353
- "eval_samples_per_second": 50.709,
24354
- "eval_steps_per_second": 12.699,
24355
  "num_input_tokens_seen": 20168064,
24356
  "step": 14800
24357
  },
@@ -24678,9 +24678,9 @@
24678
  {
24679
  "epoch": 46.2967542503864,
24680
  "eval_loss": 1.3230748176574707,
24681
- "eval_runtime": 11.3422,
24682
- "eval_samples_per_second": 50.696,
24683
- "eval_steps_per_second": 12.696,
24684
  "num_input_tokens_seen": 20440208,
24685
  "step": 15000
24686
  },
@@ -25007,9 +25007,9 @@
25007
  {
25008
  "epoch": 46.914992272024726,
25009
  "eval_loss": 1.2638696432113647,
25010
- "eval_runtime": 11.3504,
25011
- "eval_samples_per_second": 50.659,
25012
- "eval_steps_per_second": 12.687,
25013
  "num_input_tokens_seen": 20713296,
25014
  "step": 15200
25015
  },
@@ -25336,9 +25336,9 @@
25336
  {
25337
  "epoch": 47.53168469860896,
25338
  "eval_loss": 1.3379755020141602,
25339
- "eval_runtime": 11.3332,
25340
- "eval_samples_per_second": 50.736,
25341
- "eval_steps_per_second": 12.706,
25342
  "num_input_tokens_seen": 20985744,
25343
  "step": 15400
25344
  },
@@ -25665,9 +25665,9 @@
25665
  {
25666
  "epoch": 48.1483771251932,
25667
  "eval_loss": 1.2503776550292969,
25668
- "eval_runtime": 11.3453,
25669
- "eval_samples_per_second": 50.682,
25670
- "eval_steps_per_second": 12.692,
25671
  "num_input_tokens_seen": 21257920,
25672
  "step": 15600
25673
  },
@@ -25994,9 +25994,9 @@
25994
  {
25995
  "epoch": 48.76661514683153,
25996
  "eval_loss": 1.2862586975097656,
25997
- "eval_runtime": 11.3455,
25998
- "eval_samples_per_second": 50.681,
25999
- "eval_steps_per_second": 12.692,
26000
  "num_input_tokens_seen": 21529248,
26001
  "step": 15800
26002
  },
@@ -26323,9 +26323,9 @@
26323
  {
26324
  "epoch": 49.38330757341576,
26325
  "eval_loss": 1.312309741973877,
26326
- "eval_runtime": 11.34,
26327
- "eval_samples_per_second": 50.706,
26328
- "eval_steps_per_second": 12.698,
26329
  "num_input_tokens_seen": 21800992,
26330
  "step": 16000
26331
  },
@@ -26652,9 +26652,9 @@
26652
  {
26653
  "epoch": 50.0,
26654
  "eval_loss": 1.2966762781143188,
26655
- "eval_runtime": 11.3207,
26656
- "eval_samples_per_second": 50.792,
26657
- "eval_steps_per_second": 12.72,
26658
  "num_input_tokens_seen": 22073392,
26659
  "step": 16200
26660
  },
@@ -26981,9 +26981,9 @@
26981
  {
26982
  "epoch": 50.618238021638334,
26983
  "eval_loss": 1.3632538318634033,
26984
- "eval_runtime": 11.3325,
26985
- "eval_samples_per_second": 50.739,
26986
- "eval_steps_per_second": 12.707,
26987
  "num_input_tokens_seen": 22345648,
26988
  "step": 16400
26989
  },
@@ -27310,9 +27310,9 @@
27310
  {
27311
  "epoch": 51.23493044822256,
27312
  "eval_loss": 1.3670175075531006,
27313
- "eval_runtime": 11.3332,
27314
- "eval_samples_per_second": 50.736,
27315
- "eval_steps_per_second": 12.706,
27316
  "num_input_tokens_seen": 22617984,
27317
  "step": 16600
27318
  },
@@ -27639,9 +27639,9 @@
27639
  {
27640
  "epoch": 51.8531684698609,
27641
  "eval_loss": 1.3320527076721191,
27642
- "eval_runtime": 11.3513,
27643
- "eval_samples_per_second": 50.655,
27644
- "eval_steps_per_second": 12.686,
27645
  "num_input_tokens_seen": 22892544,
27646
  "step": 16800
27647
  },
@@ -27968,9 +27968,9 @@
27968
  {
27969
  "epoch": 52.469860896445134,
27970
  "eval_loss": 1.430206537246704,
27971
- "eval_runtime": 11.3356,
27972
- "eval_samples_per_second": 50.725,
27973
- "eval_steps_per_second": 12.703,
27974
  "num_input_tokens_seen": 23163488,
27975
  "step": 17000
27976
  },
@@ -28297,9 +28297,9 @@
28297
  {
28298
  "epoch": 53.086553323029364,
28299
  "eval_loss": 1.321289300918579,
28300
- "eval_runtime": 11.3247,
28301
- "eval_samples_per_second": 50.774,
28302
- "eval_steps_per_second": 12.716,
28303
  "num_input_tokens_seen": 23438320,
28304
  "step": 17200
28305
  },
@@ -28626,9 +28626,9 @@
28626
  {
28627
  "epoch": 53.7047913446677,
28628
  "eval_loss": 1.4570552110671997,
28629
- "eval_runtime": 11.3234,
28630
- "eval_samples_per_second": 50.78,
28631
- "eval_steps_per_second": 12.717,
28632
  "num_input_tokens_seen": 23708720,
28633
  "step": 17400
28634
  },
@@ -28955,9 +28955,9 @@
28955
  {
28956
  "epoch": 54.321483771251934,
28957
  "eval_loss": 1.3873727321624756,
28958
- "eval_runtime": 11.3556,
28959
- "eval_samples_per_second": 50.636,
28960
- "eval_steps_per_second": 12.681,
28961
  "num_input_tokens_seen": 23984304,
28962
  "step": 17600
28963
  },
@@ -29284,9 +29284,9 @@
29284
  {
29285
  "epoch": 54.93972179289026,
29286
  "eval_loss": 1.4202662706375122,
29287
- "eval_runtime": 11.3507,
29288
- "eval_samples_per_second": 50.658,
29289
- "eval_steps_per_second": 12.686,
29290
  "num_input_tokens_seen": 24256368,
29291
  "step": 17800
29292
  },
@@ -29613,9 +29613,9 @@
29613
  {
29614
  "epoch": 55.5564142194745,
29615
  "eval_loss": 1.3830780982971191,
29616
- "eval_runtime": 11.3461,
29617
- "eval_samples_per_second": 50.678,
29618
- "eval_steps_per_second": 12.692,
29619
  "num_input_tokens_seen": 24527040,
29620
  "step": 18000
29621
  },
@@ -29942,9 +29942,9 @@
29942
  {
29943
  "epoch": 56.173106646058734,
29944
  "eval_loss": 1.4858934879302979,
29945
- "eval_runtime": 11.3373,
29946
- "eval_samples_per_second": 50.717,
29947
- "eval_steps_per_second": 12.701,
29948
  "num_input_tokens_seen": 24799312,
29949
  "step": 18200
29950
  },
@@ -30271,9 +30271,9 @@
30271
  {
30272
  "epoch": 56.79134466769706,
30273
  "eval_loss": 1.5054408311843872,
30274
- "eval_runtime": 11.343,
30275
- "eval_samples_per_second": 50.692,
30276
- "eval_steps_per_second": 12.695,
30277
  "num_input_tokens_seen": 25072848,
30278
  "step": 18400
30279
  },
@@ -30600,9 +30600,9 @@
30600
  {
30601
  "epoch": 57.4080370942813,
30602
  "eval_loss": 1.473268747329712,
30603
- "eval_runtime": 11.3334,
30604
- "eval_samples_per_second": 50.735,
30605
- "eval_steps_per_second": 12.706,
30606
  "num_input_tokens_seen": 25347056,
30607
  "step": 18600
30608
  },
@@ -30929,9 +30929,9 @@
30929
  {
30930
  "epoch": 58.024729520865534,
30931
  "eval_loss": 1.5095571279525757,
30932
- "eval_runtime": 11.3198,
30933
- "eval_samples_per_second": 50.796,
30934
- "eval_steps_per_second": 12.721,
30935
  "num_input_tokens_seen": 25618400,
30936
  "step": 18800
30937
  },
@@ -31258,9 +31258,9 @@
31258
  {
31259
  "epoch": 58.64296754250386,
31260
  "eval_loss": 1.3225481510162354,
31261
- "eval_runtime": 11.3509,
31262
- "eval_samples_per_second": 50.657,
31263
- "eval_steps_per_second": 12.686,
31264
  "num_input_tokens_seen": 25892960,
31265
  "step": 19000
31266
  },
@@ -31587,9 +31587,9 @@
31587
  {
31588
  "epoch": 59.2596599690881,
31589
  "eval_loss": 1.4172106981277466,
31590
- "eval_runtime": 11.3335,
31591
- "eval_samples_per_second": 50.734,
31592
- "eval_steps_per_second": 12.706,
31593
  "num_input_tokens_seen": 26164688,
31594
  "step": 19200
31595
  },
@@ -31916,9 +31916,9 @@
31916
  {
31917
  "epoch": 59.87789799072643,
31918
  "eval_loss": 1.35789155960083,
31919
- "eval_runtime": 11.3267,
31920
- "eval_samples_per_second": 50.765,
31921
- "eval_steps_per_second": 12.713,
31922
  "num_input_tokens_seen": 26437392,
31923
  "step": 19400
31924
  },
@@ -32245,9 +32245,9 @@
32245
  {
32246
  "epoch": 60.49459041731066,
32247
  "eval_loss": 1.406263828277588,
32248
- "eval_runtime": 12.8664,
32249
- "eval_samples_per_second": 44.69,
32250
- "eval_steps_per_second": 11.192,
32251
  "num_input_tokens_seen": 26710176,
32252
  "step": 19600
32253
  },
@@ -32574,9 +32574,9 @@
32574
  {
32575
  "epoch": 61.1112828438949,
32576
  "eval_loss": 1.4265893697738647,
32577
- "eval_runtime": 11.3163,
32578
- "eval_samples_per_second": 50.812,
32579
- "eval_steps_per_second": 12.725,
32580
  "num_input_tokens_seen": 26981728,
32581
  "step": 19800
32582
  },
@@ -32903,9 +32903,9 @@
32903
  {
32904
  "epoch": 61.72952086553323,
32905
  "eval_loss": 1.3551362752914429,
32906
- "eval_runtime": 11.3506,
32907
- "eval_samples_per_second": 50.658,
32908
- "eval_steps_per_second": 12.687,
32909
  "num_input_tokens_seen": 27253632,
32910
  "step": 20000
32911
  },
@@ -33232,9 +33232,9 @@
33232
  {
33233
  "epoch": 62.34621329211747,
33234
  "eval_loss": 1.4743679761886597,
33235
- "eval_runtime": 11.335,
33236
- "eval_samples_per_second": 50.728,
33237
- "eval_steps_per_second": 12.704,
33238
  "num_input_tokens_seen": 27524928,
33239
  "step": 20200
33240
  },
@@ -33561,9 +33561,9 @@
33561
  {
33562
  "epoch": 62.964451313755795,
33563
  "eval_loss": 1.5115978717803955,
33564
- "eval_runtime": 11.3216,
33565
- "eval_samples_per_second": 50.788,
33566
- "eval_steps_per_second": 12.719,
33567
  "num_input_tokens_seen": 27799712,
33568
  "step": 20400
33569
  },
@@ -33890,9 +33890,9 @@
33890
  {
33891
  "epoch": 63.58114374034003,
33892
  "eval_loss": 1.5977118015289307,
33893
- "eval_runtime": 11.3317,
33894
- "eval_samples_per_second": 50.743,
33895
- "eval_steps_per_second": 12.708,
33896
  "num_input_tokens_seen": 28071024,
33897
  "step": 20600
33898
  },
@@ -34219,9 +34219,9 @@
34219
  {
34220
  "epoch": 64.19783616692426,
34221
  "eval_loss": 1.5763089656829834,
34222
- "eval_runtime": 11.3366,
34223
- "eval_samples_per_second": 50.721,
34224
- "eval_steps_per_second": 12.702,
34225
  "num_input_tokens_seen": 28342880,
34226
  "step": 20800
34227
  },
@@ -34548,9 +34548,9 @@
34548
  {
34549
  "epoch": 64.8160741885626,
34550
  "eval_loss": 1.6289054155349731,
34551
- "eval_runtime": 11.3575,
34552
- "eval_samples_per_second": 50.627,
34553
- "eval_steps_per_second": 12.679,
34554
  "num_input_tokens_seen": 28617696,
34555
  "step": 21000
34556
  },
@@ -34877,9 +34877,9 @@
34877
  {
34878
  "epoch": 65.43276661514683,
34879
  "eval_loss": 1.6688075065612793,
34880
- "eval_runtime": 11.3331,
34881
- "eval_samples_per_second": 50.736,
34882
- "eval_steps_per_second": 12.706,
34883
  "num_input_tokens_seen": 28888112,
34884
  "step": 21200
34885
  },
@@ -35206,9 +35206,9 @@
35206
  {
35207
  "epoch": 66.04945904173107,
35208
  "eval_loss": 1.6155662536621094,
35209
- "eval_runtime": 11.3795,
35210
- "eval_samples_per_second": 50.53,
35211
- "eval_steps_per_second": 12.654,
35212
  "num_input_tokens_seen": 29162944,
35213
  "step": 21400
35214
  },
@@ -35535,9 +35535,9 @@
35535
  {
35536
  "epoch": 66.6676970633694,
35537
  "eval_loss": 1.6828913688659668,
35538
- "eval_runtime": 11.3417,
35539
- "eval_samples_per_second": 50.698,
35540
- "eval_steps_per_second": 12.697,
35541
  "num_input_tokens_seen": 29434784,
35542
  "step": 21600
35543
  },
@@ -35864,9 +35864,9 @@
35864
  {
35865
  "epoch": 67.28438948995363,
35866
  "eval_loss": 1.6700409650802612,
35867
- "eval_runtime": 11.3434,
35868
- "eval_samples_per_second": 50.69,
35869
- "eval_steps_per_second": 12.695,
35870
  "num_input_tokens_seen": 29706800,
35871
  "step": 21800
35872
  },
@@ -36193,9 +36193,9 @@
36193
  {
36194
  "epoch": 67.90262751159196,
36195
  "eval_loss": 1.6916232109069824,
36196
- "eval_runtime": 11.3242,
36197
- "eval_samples_per_second": 50.776,
36198
- "eval_steps_per_second": 12.716,
36199
  "num_input_tokens_seen": 29980240,
36200
  "step": 22000
36201
  },
@@ -36522,9 +36522,9 @@
36522
  {
36523
  "epoch": 68.5193199381762,
36524
  "eval_loss": 1.7332632541656494,
36525
- "eval_runtime": 11.3077,
36526
- "eval_samples_per_second": 50.85,
36527
- "eval_steps_per_second": 12.735,
36528
  "num_input_tokens_seen": 30250192,
36529
  "step": 22200
36530
  },
@@ -36851,9 +36851,9 @@
36851
  {
36852
  "epoch": 69.13601236476043,
36853
  "eval_loss": 1.7388529777526855,
36854
- "eval_runtime": 11.34,
36855
- "eval_samples_per_second": 50.705,
36856
- "eval_steps_per_second": 12.698,
36857
  "num_input_tokens_seen": 30522672,
36858
  "step": 22400
36859
  },
@@ -37180,9 +37180,9 @@
37180
  {
37181
  "epoch": 69.75425038639877,
37182
  "eval_loss": 1.7202584743499756,
37183
- "eval_runtime": 11.3257,
37184
- "eval_samples_per_second": 50.769,
37185
- "eval_steps_per_second": 12.714,
37186
  "num_input_tokens_seen": 30795024,
37187
  "step": 22600
37188
  },
@@ -37509,9 +37509,9 @@
37509
  {
37510
  "epoch": 70.370942812983,
37511
  "eval_loss": 1.7700324058532715,
37512
- "eval_runtime": 11.3521,
37513
- "eval_samples_per_second": 50.651,
37514
- "eval_steps_per_second": 12.685,
37515
  "num_input_tokens_seen": 31066544,
37516
  "step": 22800
37517
  },
@@ -37838,9 +37838,9 @@
37838
  {
37839
  "epoch": 70.98918083462132,
37840
  "eval_loss": 1.769662857055664,
37841
- "eval_runtime": 11.3403,
37842
- "eval_samples_per_second": 50.704,
37843
- "eval_steps_per_second": 12.698,
37844
  "num_input_tokens_seen": 31338128,
37845
  "step": 23000
37846
  },
@@ -38167,9 +38167,9 @@
38167
  {
38168
  "epoch": 71.60587326120556,
38169
  "eval_loss": 1.8099125623703003,
38170
- "eval_runtime": 11.3562,
38171
- "eval_samples_per_second": 50.633,
38172
- "eval_steps_per_second": 12.68,
38173
  "num_input_tokens_seen": 31609104,
38174
  "step": 23200
38175
  },
@@ -38496,9 +38496,9 @@
38496
  {
38497
  "epoch": 72.2225656877898,
38498
  "eval_loss": 1.856191873550415,
38499
- "eval_runtime": 11.3444,
38500
- "eval_samples_per_second": 50.686,
38501
- "eval_steps_per_second": 12.694,
38502
  "num_input_tokens_seen": 31881424,
38503
  "step": 23400
38504
  },
@@ -38825,9 +38825,9 @@
38825
  {
38826
  "epoch": 72.84080370942813,
38827
  "eval_loss": 1.7837176322937012,
38828
- "eval_runtime": 11.3265,
38829
- "eval_samples_per_second": 50.766,
38830
- "eval_steps_per_second": 12.714,
38831
  "num_input_tokens_seen": 32155024,
38832
  "step": 23600
38833
  },
@@ -39154,9 +39154,9 @@
39154
  {
39155
  "epoch": 73.45749613601237,
39156
  "eval_loss": 1.8125648498535156,
39157
- "eval_runtime": 11.321,
39158
- "eval_samples_per_second": 50.79,
39159
- "eval_steps_per_second": 12.72,
39160
  "num_input_tokens_seen": 32425312,
39161
  "step": 23800
39162
  },
@@ -39483,9 +39483,9 @@
39483
  {
39484
  "epoch": 74.0741885625966,
39485
  "eval_loss": 1.8575142621994019,
39486
- "eval_runtime": 11.3337,
39487
- "eval_samples_per_second": 50.734,
39488
- "eval_steps_per_second": 12.706,
39489
  "num_input_tokens_seen": 32698784,
39490
  "step": 24000
39491
  },
@@ -39812,9 +39812,9 @@
39812
  {
39813
  "epoch": 74.69242658423494,
39814
  "eval_loss": 1.8753187656402588,
39815
- "eval_runtime": 11.3403,
39816
- "eval_samples_per_second": 50.704,
39817
- "eval_steps_per_second": 12.698,
39818
  "num_input_tokens_seen": 32974144,
39819
  "step": 24200
39820
  },
@@ -40141,9 +40141,9 @@
40141
  {
40142
  "epoch": 75.30911901081916,
40143
  "eval_loss": 1.9167370796203613,
40144
- "eval_runtime": 11.3429,
40145
- "eval_samples_per_second": 50.692,
40146
- "eval_steps_per_second": 12.695,
40147
  "num_input_tokens_seen": 33245216,
40148
  "step": 24400
40149
  },
@@ -40470,9 +40470,9 @@
40470
  {
40471
  "epoch": 75.9273570324575,
40472
  "eval_loss": 1.1968048810958862,
40473
- "eval_runtime": 11.3324,
40474
- "eval_samples_per_second": 50.74,
40475
- "eval_steps_per_second": 12.707,
40476
  "num_input_tokens_seen": 33517088,
40477
  "step": 24600
40478
  },
@@ -40799,9 +40799,9 @@
40799
  {
40800
  "epoch": 76.54404945904173,
40801
  "eval_loss": 1.3781951665878296,
40802
- "eval_runtime": 11.3729,
40803
- "eval_samples_per_second": 50.559,
40804
- "eval_steps_per_second": 12.662,
40805
  "num_input_tokens_seen": 33788432,
40806
  "step": 24800
40807
  },
@@ -41128,9 +41128,9 @@
41128
  {
41129
  "epoch": 77.16074188562597,
41130
  "eval_loss": 1.5010449886322021,
41131
- "eval_runtime": 11.3567,
41132
- "eval_samples_per_second": 50.631,
41133
- "eval_steps_per_second": 12.68,
41134
  "num_input_tokens_seen": 34060416,
41135
  "step": 25000
41136
  },
@@ -41457,9 +41457,9 @@
41457
  {
41458
  "epoch": 77.7789799072643,
41459
  "eval_loss": 1.5149627923965454,
41460
- "eval_runtime": 11.3386,
41461
- "eval_samples_per_second": 50.712,
41462
- "eval_steps_per_second": 12.7,
41463
  "num_input_tokens_seen": 34333408,
41464
  "step": 25200
41465
  },
@@ -41786,9 +41786,9 @@
41786
  {
41787
  "epoch": 78.39567233384854,
41788
  "eval_loss": 1.6160272359848022,
41789
- "eval_runtime": 11.3278,
41790
- "eval_samples_per_second": 50.76,
41791
- "eval_steps_per_second": 12.712,
41792
  "num_input_tokens_seen": 34605392,
41793
  "step": 25400
41794
  },
@@ -42115,9 +42115,9 @@
42115
  {
42116
  "epoch": 79.01236476043276,
42117
  "eval_loss": 1.5819573402404785,
42118
- "eval_runtime": 11.3387,
42119
- "eval_samples_per_second": 50.711,
42120
- "eval_steps_per_second": 12.7,
42121
  "num_input_tokens_seen": 34879536,
42122
  "step": 25600
42123
  },
@@ -42444,9 +42444,9 @@
42444
  {
42445
  "epoch": 79.6306027820711,
42446
  "eval_loss": 1.651304841041565,
42447
- "eval_runtime": 11.3703,
42448
- "eval_samples_per_second": 50.57,
42449
- "eval_steps_per_second": 12.665,
42450
  "num_input_tokens_seen": 35153488,
42451
  "step": 25800
42452
  },
@@ -42773,9 +42773,9 @@
42773
  {
42774
  "epoch": 80.24729520865533,
42775
  "eval_loss": 1.6964157819747925,
42776
- "eval_runtime": 11.3434,
42777
- "eval_samples_per_second": 50.69,
42778
- "eval_steps_per_second": 12.695,
42779
  "num_input_tokens_seen": 35424912,
42780
  "step": 26000
42781
  },
@@ -43102,9 +43102,9 @@
43102
  {
43103
  "epoch": 80.86553323029366,
43104
  "eval_loss": 1.7483088970184326,
43105
- "eval_runtime": 11.341,
43106
- "eval_samples_per_second": 50.701,
43107
- "eval_steps_per_second": 12.697,
43108
  "num_input_tokens_seen": 35698064,
43109
  "step": 26200
43110
  },
@@ -43431,9 +43431,9 @@
43431
  {
43432
  "epoch": 81.4822256568779,
43433
  "eval_loss": 1.7370902299880981,
43434
- "eval_runtime": 11.3472,
43435
- "eval_samples_per_second": 50.673,
43436
- "eval_steps_per_second": 12.69,
43437
  "num_input_tokens_seen": 35968160,
43438
  "step": 26400
43439
  },
@@ -43760,9 +43760,9 @@
43760
  {
43761
  "epoch": 82.09891808346214,
43762
  "eval_loss": 1.7790963649749756,
43763
- "eval_runtime": 11.3412,
43764
- "eval_samples_per_second": 50.7,
43765
- "eval_steps_per_second": 12.697,
43766
  "num_input_tokens_seen": 36240928,
43767
  "step": 26600
43768
  },
@@ -44089,9 +44089,9 @@
44089
  {
44090
  "epoch": 82.71715610510046,
44091
  "eval_loss": 1.772797703742981,
44092
- "eval_runtime": 11.3788,
44093
- "eval_samples_per_second": 50.533,
44094
- "eval_steps_per_second": 12.655,
44095
  "num_input_tokens_seen": 36514208,
44096
  "step": 26800
44097
  },
@@ -44418,9 +44418,9 @@
44418
  {
44419
  "epoch": 83.3338485316847,
44420
  "eval_loss": 1.7722996473312378,
44421
- "eval_runtime": 11.3552,
44422
- "eval_samples_per_second": 50.638,
44423
- "eval_steps_per_second": 12.681,
44424
  "num_input_tokens_seen": 36785136,
44425
  "step": 27000
44426
  },
@@ -44747,9 +44747,9 @@
44747
  {
44748
  "epoch": 83.95208655332303,
44749
  "eval_loss": 1.8002300262451172,
44750
- "eval_runtime": 11.3357,
44751
- "eval_samples_per_second": 50.725,
44752
- "eval_steps_per_second": 12.703,
44753
  "num_input_tokens_seen": 37061648,
44754
  "step": 27200
44755
  },
@@ -45076,9 +45076,9 @@
45076
  {
45077
  "epoch": 84.56877897990726,
45078
  "eval_loss": 1.804320216178894,
45079
- "eval_runtime": 11.3403,
45080
- "eval_samples_per_second": 50.704,
45081
- "eval_steps_per_second": 12.698,
45082
  "num_input_tokens_seen": 37333648,
45083
  "step": 27400
45084
  },
@@ -45405,9 +45405,9 @@
45405
  {
45406
  "epoch": 85.1854714064915,
45407
  "eval_loss": 1.8354555368423462,
45408
- "eval_runtime": 11.3408,
45409
- "eval_samples_per_second": 50.702,
45410
- "eval_steps_per_second": 12.698,
45411
  "num_input_tokens_seen": 37605184,
45412
  "step": 27600
45413
  },
@@ -45734,9 +45734,9 @@
45734
  {
45735
  "epoch": 85.80370942812984,
45736
  "eval_loss": 1.8400607109069824,
45737
- "eval_runtime": 11.3348,
45738
- "eval_samples_per_second": 50.729,
45739
- "eval_steps_per_second": 12.704,
45740
  "num_input_tokens_seen": 37875360,
45741
  "step": 27800
45742
  },
@@ -46063,9 +46063,9 @@
46063
  {
46064
  "epoch": 86.42040185471407,
46065
  "eval_loss": 1.8688201904296875,
46066
- "eval_runtime": 11.3516,
46067
- "eval_samples_per_second": 50.653,
46068
- "eval_steps_per_second": 12.685,
46069
  "num_input_tokens_seen": 38150208,
46070
  "step": 28000
46071
  },
@@ -46392,9 +46392,9 @@
46392
  {
46393
  "epoch": 87.0370942812983,
46394
  "eval_loss": 1.810387134552002,
46395
- "eval_runtime": 11.348,
46396
- "eval_samples_per_second": 50.67,
46397
- "eval_steps_per_second": 12.689,
46398
  "num_input_tokens_seen": 38422048,
46399
  "step": 28200
46400
  },
@@ -46721,9 +46721,9 @@
46721
  {
46722
  "epoch": 87.65533230293663,
46723
  "eval_loss": 1.8730015754699707,
46724
- "eval_runtime": 11.3451,
46725
- "eval_samples_per_second": 50.683,
46726
- "eval_steps_per_second": 12.693,
46727
  "num_input_tokens_seen": 38692224,
46728
  "step": 28400
46729
  },
@@ -47050,9 +47050,9 @@
47050
  {
47051
  "epoch": 88.27202472952087,
47052
  "eval_loss": 1.8786824941635132,
47053
- "eval_runtime": 11.364,
47054
- "eval_samples_per_second": 50.598,
47055
- "eval_steps_per_second": 12.672,
47056
  "num_input_tokens_seen": 38964176,
47057
  "step": 28600
47058
  },
@@ -47379,9 +47379,9 @@
47379
  {
47380
  "epoch": 88.8902627511592,
47381
  "eval_loss": 1.8849008083343506,
47382
- "eval_runtime": 11.3534,
47383
- "eval_samples_per_second": 50.646,
47384
- "eval_steps_per_second": 12.683,
47385
  "num_input_tokens_seen": 39235184,
47386
  "step": 28800
47387
  },
@@ -47708,9 +47708,9 @@
47708
  {
47709
  "epoch": 89.50695517774344,
47710
  "eval_loss": 1.9232840538024902,
47711
- "eval_runtime": 11.357,
47712
- "eval_samples_per_second": 50.63,
47713
- "eval_steps_per_second": 12.679,
47714
  "num_input_tokens_seen": 39507520,
47715
  "step": 29000
47716
  },
@@ -48037,9 +48037,9 @@
48037
  {
48038
  "epoch": 90.12364760432767,
48039
  "eval_loss": 1.9127227067947388,
48040
- "eval_runtime": 11.3392,
48041
- "eval_samples_per_second": 50.709,
48042
- "eval_steps_per_second": 12.699,
48043
  "num_input_tokens_seen": 39779328,
48044
  "step": 29200
48045
  },
@@ -48366,9 +48366,9 @@
48366
  {
48367
  "epoch": 90.74188562596599,
48368
  "eval_loss": 1.8981382846832275,
48369
- "eval_runtime": 11.3584,
48370
- "eval_samples_per_second": 50.623,
48371
- "eval_steps_per_second": 12.678,
48372
  "num_input_tokens_seen": 40051520,
48373
  "step": 29400
48374
  },
@@ -48695,9 +48695,9 @@
48695
  {
48696
  "epoch": 91.35857805255023,
48697
  "eval_loss": 1.9302953481674194,
48698
- "eval_runtime": 11.3578,
48699
- "eval_samples_per_second": 50.626,
48700
- "eval_steps_per_second": 12.679,
48701
  "num_input_tokens_seen": 40322576,
48702
  "step": 29600
48703
  },
@@ -49024,9 +49024,9 @@
49024
  {
49025
  "epoch": 91.97681607418856,
49026
  "eval_loss": 1.9179975986480713,
49027
- "eval_runtime": 11.352,
49028
- "eval_samples_per_second": 50.652,
49029
- "eval_steps_per_second": 12.685,
49030
  "num_input_tokens_seen": 40596016,
49031
  "step": 29800
49032
  },
@@ -49353,9 +49353,9 @@
49353
  {
49354
  "epoch": 92.5935085007728,
49355
  "eval_loss": 1.92044997215271,
49356
- "eval_runtime": 11.3431,
49357
- "eval_samples_per_second": 50.692,
49358
- "eval_steps_per_second": 12.695,
49359
  "num_input_tokens_seen": 40867568,
49360
  "step": 30000
49361
  },
@@ -49682,9 +49682,9 @@
49682
  {
49683
  "epoch": 93.21020092735704,
49684
  "eval_loss": 1.9711647033691406,
49685
- "eval_runtime": 11.3695,
49686
- "eval_samples_per_second": 50.574,
49687
- "eval_steps_per_second": 12.665,
49688
  "num_input_tokens_seen": 41140848,
49689
  "step": 30200
49690
  },
@@ -50011,9 +50011,9 @@
50011
  {
50012
  "epoch": 93.82843894899537,
50013
  "eval_loss": 1.976061463356018,
50014
- "eval_runtime": 11.38,
50015
- "eval_samples_per_second": 50.527,
50016
- "eval_steps_per_second": 12.654,
50017
  "num_input_tokens_seen": 41412848,
50018
  "step": 30400
50019
  },
@@ -50340,9 +50340,9 @@
50340
  {
50341
  "epoch": 94.44513137557959,
50342
  "eval_loss": 1.958508849143982,
50343
- "eval_runtime": 11.3371,
50344
- "eval_samples_per_second": 50.719,
50345
- "eval_steps_per_second": 12.702,
50346
  "num_input_tokens_seen": 41683920,
50347
  "step": 30600
50348
  },
@@ -50669,9 +50669,9 @@
50669
  {
50670
  "epoch": 95.06182380216383,
50671
  "eval_loss": 1.9966574907302856,
50672
- "eval_runtime": 11.3424,
50673
- "eval_samples_per_second": 50.695,
50674
- "eval_steps_per_second": 12.696,
50675
  "num_input_tokens_seen": 41959008,
50676
  "step": 30800
50677
  },
@@ -50998,9 +50998,9 @@
50998
  {
50999
  "epoch": 95.68006182380216,
51000
  "eval_loss": 1.9950237274169922,
51001
- "eval_runtime": 11.3443,
51002
- "eval_samples_per_second": 50.686,
51003
- "eval_steps_per_second": 12.694,
51004
  "num_input_tokens_seen": 42231520,
51005
  "step": 31000
51006
  },
@@ -51327,9 +51327,9 @@
51327
  {
51328
  "epoch": 96.2967542503864,
51329
  "eval_loss": 1.9839365482330322,
51330
- "eval_runtime": 11.3428,
51331
- "eval_samples_per_second": 50.693,
51332
- "eval_steps_per_second": 12.695,
51333
  "num_input_tokens_seen": 42502416,
51334
  "step": 31200
51335
  },
@@ -51656,9 +51656,9 @@
51656
  {
51657
  "epoch": 96.91499227202473,
51658
  "eval_loss": 2.004136323928833,
51659
- "eval_runtime": 11.3314,
51660
- "eval_samples_per_second": 50.744,
51661
- "eval_steps_per_second": 12.708,
51662
  "num_input_tokens_seen": 42776304,
51663
  "step": 31400
51664
  },
@@ -51985,9 +51985,9 @@
51985
  {
51986
  "epoch": 97.53168469860897,
51987
  "eval_loss": 2.016206979751587,
51988
- "eval_runtime": 11.3358,
51989
- "eval_samples_per_second": 50.724,
51990
- "eval_steps_per_second": 12.703,
51991
  "num_input_tokens_seen": 43048176,
51992
  "step": 31600
51993
  },
@@ -52314,9 +52314,9 @@
52314
  {
52315
  "epoch": 98.14837712519319,
52316
  "eval_loss": 2.010310173034668,
52317
- "eval_runtime": 11.3477,
52318
- "eval_samples_per_second": 50.671,
52319
- "eval_steps_per_second": 12.69,
52320
  "num_input_tokens_seen": 43320144,
52321
  "step": 31800
52322
  },
@@ -52643,9 +52643,9 @@
52643
  {
52644
  "epoch": 98.76661514683153,
52645
  "eval_loss": 2.008124589920044,
52646
- "eval_runtime": 11.3399,
52647
- "eval_samples_per_second": 50.706,
52648
- "eval_steps_per_second": 12.699,
52649
  "num_input_tokens_seen": 43591728,
52650
  "step": 32000
52651
  },
@@ -52972,9 +52972,9 @@
52972
  {
52973
  "epoch": 99.38330757341576,
52974
  "eval_loss": 2.027338981628418,
52975
- "eval_runtime": 11.3475,
52976
- "eval_samples_per_second": 50.672,
52977
- "eval_steps_per_second": 12.69,
52978
  "num_input_tokens_seen": 43866048,
52979
  "step": 32200
52980
  },
@@ -53301,9 +53301,9 @@
53301
  {
53302
  "epoch": 100.0,
53303
  "eval_loss": 2.034688711166382,
53304
- "eval_runtime": 11.3308,
53305
- "eval_samples_per_second": 50.747,
53306
- "eval_steps_per_second": 12.709,
53307
  "num_input_tokens_seen": 44137040,
53308
  "step": 32400
53309
  },
@@ -53630,9 +53630,9 @@
53630
  {
53631
  "epoch": 100.61823802163833,
53632
  "eval_loss": 2.052443265914917,
53633
- "eval_runtime": 11.3302,
53634
- "eval_samples_per_second": 50.749,
53635
- "eval_steps_per_second": 12.709,
53636
  "num_input_tokens_seen": 44408848,
53637
  "step": 32600
53638
  },
@@ -53959,9 +53959,9 @@
53959
  {
53960
  "epoch": 101.23493044822257,
53961
  "eval_loss": 2.067155599594116,
53962
- "eval_runtime": 11.3619,
53963
- "eval_samples_per_second": 50.608,
53964
- "eval_steps_per_second": 12.674,
53965
  "num_input_tokens_seen": 44682912,
53966
  "step": 32800
53967
  },
@@ -54288,9 +54288,9 @@
54288
  {
54289
  "epoch": 101.85316846986089,
54290
  "eval_loss": 2.042910575866699,
54291
- "eval_runtime": 11.3611,
54292
- "eval_samples_per_second": 50.611,
54293
- "eval_steps_per_second": 12.675,
54294
  "num_input_tokens_seen": 44956000,
54295
  "step": 33000
54296
  },
@@ -54617,9 +54617,9 @@
54617
  {
54618
  "epoch": 102.46986089644513,
54619
  "eval_loss": 2.049968719482422,
54620
- "eval_runtime": 11.3446,
54621
- "eval_samples_per_second": 50.685,
54622
- "eval_steps_per_second": 12.693,
54623
  "num_input_tokens_seen": 45227824,
54624
  "step": 33200
54625
  },
@@ -54946,9 +54946,9 @@
54946
  {
54947
  "epoch": 103.08655332302936,
54948
  "eval_loss": 2.047577142715454,
54949
- "eval_runtime": 11.3408,
54950
- "eval_samples_per_second": 50.702,
54951
- "eval_steps_per_second": 12.698,
54952
  "num_input_tokens_seen": 45498320,
54953
  "step": 33400
54954
  },
@@ -55275,9 +55275,9 @@
55275
  {
55276
  "epoch": 103.7047913446677,
55277
  "eval_loss": 2.0636227130889893,
55278
- "eval_runtime": 11.3327,
55279
- "eval_samples_per_second": 50.738,
55280
- "eval_steps_per_second": 12.707,
55281
  "num_input_tokens_seen": 45773648,
55282
  "step": 33600
55283
  },
@@ -55604,9 +55604,9 @@
55604
  {
55605
  "epoch": 104.32148377125193,
55606
  "eval_loss": 2.080799102783203,
55607
- "eval_runtime": 11.3402,
55608
- "eval_samples_per_second": 50.705,
55609
- "eval_steps_per_second": 12.698,
55610
  "num_input_tokens_seen": 46044128,
55611
  "step": 33800
55612
  },
@@ -55933,9 +55933,9 @@
55933
  {
55934
  "epoch": 104.93972179289027,
55935
  "eval_loss": 2.0720911026000977,
55936
- "eval_runtime": 11.3379,
55937
- "eval_samples_per_second": 50.715,
55938
- "eval_steps_per_second": 12.701,
55939
  "num_input_tokens_seen": 46317504,
55940
  "step": 34000
55941
  },
@@ -56262,9 +56262,9 @@
56262
  {
56263
  "epoch": 105.5564142194745,
56264
  "eval_loss": 2.082965612411499,
56265
- "eval_runtime": 11.3573,
56266
- "eval_samples_per_second": 50.628,
56267
- "eval_steps_per_second": 12.679,
56268
  "num_input_tokens_seen": 46589024,
56269
  "step": 34200
56270
  },
@@ -56591,9 +56591,9 @@
56591
  {
56592
  "epoch": 106.17310664605873,
56593
  "eval_loss": 2.094463348388672,
56594
- "eval_runtime": 11.3404,
56595
- "eval_samples_per_second": 50.704,
56596
- "eval_steps_per_second": 12.698,
56597
  "num_input_tokens_seen": 46863680,
56598
  "step": 34400
56599
  },
@@ -56920,9 +56920,9 @@
56920
  {
56921
  "epoch": 106.79134466769706,
56922
  "eval_loss": 2.0966665744781494,
56923
- "eval_runtime": 11.3482,
56924
- "eval_samples_per_second": 50.669,
56925
- "eval_steps_per_second": 12.689,
56926
  "num_input_tokens_seen": 47135520,
56927
  "step": 34600
56928
  },
@@ -57249,9 +57249,9 @@
57249
  {
57250
  "epoch": 107.4080370942813,
57251
  "eval_loss": 2.1042280197143555,
57252
- "eval_runtime": 11.341,
57253
- "eval_samples_per_second": 50.701,
57254
- "eval_steps_per_second": 12.697,
57255
  "num_input_tokens_seen": 47407056,
57256
  "step": 34800
57257
  },
@@ -57578,9 +57578,9 @@
57578
  {
57579
  "epoch": 108.02472952086553,
57580
  "eval_loss": 2.096859931945801,
57581
- "eval_runtime": 11.293,
57582
- "eval_samples_per_second": 50.916,
57583
- "eval_steps_per_second": 12.751,
57584
  "num_input_tokens_seen": 47680112,
57585
  "step": 35000
57586
  },
@@ -57907,9 +57907,9 @@
57907
  {
57908
  "epoch": 108.64296754250387,
57909
  "eval_loss": 2.1074352264404297,
57910
- "eval_runtime": 11.29,
57911
- "eval_samples_per_second": 50.93,
57912
- "eval_steps_per_second": 12.755,
57913
  "num_input_tokens_seen": 47951632,
57914
  "step": 35200
57915
  },
@@ -58236,9 +58236,9 @@
58236
  {
58237
  "epoch": 109.2596599690881,
58238
  "eval_loss": 2.110261917114258,
58239
- "eval_runtime": 11.3046,
58240
- "eval_samples_per_second": 50.864,
58241
- "eval_steps_per_second": 12.738,
58242
  "num_input_tokens_seen": 48224016,
58243
  "step": 35400
58244
  },
@@ -58565,9 +58565,9 @@
58565
  {
58566
  "epoch": 109.87789799072642,
58567
  "eval_loss": 2.1071767807006836,
58568
- "eval_runtime": 11.2808,
58569
- "eval_samples_per_second": 50.971,
58570
- "eval_steps_per_second": 12.765,
58571
  "num_input_tokens_seen": 48497072,
58572
  "step": 35600
58573
  },
@@ -58894,9 +58894,9 @@
58894
  {
58895
  "epoch": 110.49459041731066,
58896
  "eval_loss": 2.1081290245056152,
58897
- "eval_runtime": 11.3085,
58898
- "eval_samples_per_second": 50.847,
58899
- "eval_steps_per_second": 12.734,
58900
  "num_input_tokens_seen": 48768624,
58901
  "step": 35800
58902
  },
@@ -59223,9 +59223,9 @@
59223
  {
59224
  "epoch": 111.1112828438949,
59225
  "eval_loss": 2.1115777492523193,
59226
- "eval_runtime": 11.2907,
59227
- "eval_samples_per_second": 50.927,
59228
- "eval_steps_per_second": 12.754,
59229
  "num_input_tokens_seen": 49041488,
59230
  "step": 36000
59231
  },
@@ -59552,9 +59552,9 @@
59552
  {
59553
  "epoch": 111.72952086553323,
59554
  "eval_loss": 2.1243085861206055,
59555
- "eval_runtime": 11.29,
59556
- "eval_samples_per_second": 50.93,
59557
- "eval_steps_per_second": 12.755,
59558
  "num_input_tokens_seen": 49314352,
59559
  "step": 36200
59560
  },
@@ -59881,9 +59881,9 @@
59881
  {
59882
  "epoch": 112.34621329211747,
59883
  "eval_loss": 2.1214993000030518,
59884
- "eval_runtime": 11.2904,
59885
- "eval_samples_per_second": 50.928,
59886
- "eval_steps_per_second": 12.754,
59887
  "num_input_tokens_seen": 49584848,
59888
  "step": 36400
59889
  },
@@ -60210,9 +60210,9 @@
60210
  {
60211
  "epoch": 112.9644513137558,
60212
  "eval_loss": 2.1198999881744385,
60213
- "eval_runtime": 11.2911,
60214
- "eval_samples_per_second": 50.925,
60215
- "eval_steps_per_second": 12.753,
60216
  "num_input_tokens_seen": 49858864,
60217
  "step": 36600
60218
  },
@@ -60539,9 +60539,9 @@
60539
  {
60540
  "epoch": 113.58114374034002,
60541
  "eval_loss": 2.129167079925537,
60542
- "eval_runtime": 11.2949,
60543
- "eval_samples_per_second": 50.908,
60544
- "eval_steps_per_second": 12.749,
60545
  "num_input_tokens_seen": 50130000,
60546
  "step": 36800
60547
  },
@@ -60868,9 +60868,9 @@
60868
  {
60869
  "epoch": 114.19783616692426,
60870
  "eval_loss": 2.127554178237915,
60871
- "eval_runtime": 11.3006,
60872
- "eval_samples_per_second": 50.882,
60873
- "eval_steps_per_second": 12.743,
60874
  "num_input_tokens_seen": 50404128,
60875
  "step": 37000
60876
  },
@@ -61197,9 +61197,9 @@
61197
  {
61198
  "epoch": 114.8160741885626,
61199
  "eval_loss": 2.1345906257629395,
61200
- "eval_runtime": 11.2856,
61201
- "eval_samples_per_second": 50.95,
61202
- "eval_steps_per_second": 12.76,
61203
  "num_input_tokens_seen": 50678112,
61204
  "step": 37200
61205
  },
@@ -61526,9 +61526,9 @@
61526
  {
61527
  "epoch": 115.43276661514683,
61528
  "eval_loss": 2.132272481918335,
61529
- "eval_runtime": 11.2762,
61530
- "eval_samples_per_second": 50.992,
61531
- "eval_steps_per_second": 12.77,
61532
  "num_input_tokens_seen": 50946800,
61533
  "step": 37400
61534
  },
@@ -61855,9 +61855,9 @@
61855
  {
61856
  "epoch": 116.04945904173107,
61857
  "eval_loss": 2.131873369216919,
61858
- "eval_runtime": 11.2834,
61859
- "eval_samples_per_second": 50.96,
61860
- "eval_steps_per_second": 12.762,
61861
  "num_input_tokens_seen": 51219680,
61862
  "step": 37600
61863
  },
@@ -62184,9 +62184,9 @@
62184
  {
62185
  "epoch": 116.6676970633694,
62186
  "eval_loss": 2.1323955059051514,
62187
- "eval_runtime": 11.2909,
62188
- "eval_samples_per_second": 50.926,
62189
- "eval_steps_per_second": 12.754,
62190
  "num_input_tokens_seen": 51492544,
62191
  "step": 37800
62192
  },
@@ -62513,9 +62513,9 @@
62513
  {
62514
  "epoch": 117.28438948995363,
62515
  "eval_loss": 2.135064125061035,
62516
- "eval_runtime": 11.2652,
62517
- "eval_samples_per_second": 51.042,
62518
- "eval_steps_per_second": 12.783,
62519
  "num_input_tokens_seen": 51764160,
62520
  "step": 38000
62521
  },
@@ -62842,9 +62842,9 @@
62842
  {
62843
  "epoch": 117.90262751159196,
62844
  "eval_loss": 2.134946584701538,
62845
- "eval_runtime": 11.2618,
62846
- "eval_samples_per_second": 51.058,
62847
- "eval_steps_per_second": 12.787,
62848
  "num_input_tokens_seen": 52039488,
62849
  "step": 38200
62850
  },
@@ -63171,9 +63171,9 @@
63171
  {
63172
  "epoch": 118.5193199381762,
63173
  "eval_loss": 2.1382298469543457,
63174
- "eval_runtime": 11.2697,
63175
- "eval_samples_per_second": 51.022,
63176
- "eval_steps_per_second": 12.778,
63177
  "num_input_tokens_seen": 52311648,
63178
  "step": 38400
63179
  },
@@ -63500,9 +63500,9 @@
63500
  {
63501
  "epoch": 119.13601236476043,
63502
  "eval_loss": 2.1389503479003906,
63503
- "eval_runtime": 11.2591,
63504
- "eval_samples_per_second": 51.07,
63505
- "eval_steps_per_second": 12.79,
63506
  "num_input_tokens_seen": 52584960,
63507
  "step": 38600
63508
  },
@@ -63829,9 +63829,9 @@
63829
  {
63830
  "epoch": 119.75425038639877,
63831
  "eval_loss": 2.141028642654419,
63832
- "eval_runtime": 11.2592,
63833
- "eval_samples_per_second": 51.069,
63834
- "eval_steps_per_second": 12.79,
63835
  "num_input_tokens_seen": 52855712,
63836
  "step": 38800
63837
  },
@@ -64158,9 +64158,9 @@
64158
  {
64159
  "epoch": 120.370942812983,
64160
  "eval_loss": 2.142845630645752,
64161
- "eval_runtime": 11.261,
64162
- "eval_samples_per_second": 51.061,
64163
- "eval_steps_per_second": 12.787,
64164
  "num_input_tokens_seen": 53128480,
64165
  "step": 39000
64166
  },
@@ -64487,9 +64487,9 @@
64487
  {
64488
  "epoch": 120.98918083462132,
64489
  "eval_loss": 2.142850637435913,
64490
- "eval_runtime": 11.2575,
64491
- "eval_samples_per_second": 51.077,
64492
- "eval_steps_per_second": 12.791,
64493
  "num_input_tokens_seen": 53401056,
64494
  "step": 39200
64495
  },
@@ -64816,9 +64816,9 @@
64816
  {
64817
  "epoch": 121.60587326120556,
64818
  "eval_loss": 2.141234874725342,
64819
- "eval_runtime": 11.2631,
64820
- "eval_samples_per_second": 51.051,
64821
- "eval_steps_per_second": 12.785,
64822
  "num_input_tokens_seen": 53673600,
64823
  "step": 39400
64824
  },
@@ -65145,9 +65145,9 @@
65145
  {
65146
  "epoch": 122.2225656877898,
65147
  "eval_loss": 2.1376492977142334,
65148
- "eval_runtime": 11.2658,
65149
- "eval_samples_per_second": 51.04,
65150
- "eval_steps_per_second": 12.782,
65151
  "num_input_tokens_seen": 53943712,
65152
  "step": 39600
65153
  },
@@ -65474,9 +65474,9 @@
65474
  {
65475
  "epoch": 122.84080370942813,
65476
  "eval_loss": 2.1381330490112305,
65477
- "eval_runtime": 11.2652,
65478
- "eval_samples_per_second": 51.042,
65479
- "eval_steps_per_second": 12.783,
65480
  "num_input_tokens_seen": 54217344,
65481
  "step": 39800
65482
  },
@@ -65803,9 +65803,9 @@
65803
  {
65804
  "epoch": 123.45749613601237,
65805
  "eval_loss": 2.137033700942993,
65806
- "eval_runtime": 11.3401,
65807
- "eval_samples_per_second": 50.705,
65808
- "eval_steps_per_second": 12.698,
65809
  "num_input_tokens_seen": 54490336,
65810
  "step": 40000
65811
  },
@@ -65815,9 +65815,9 @@
65815
  "step": 40000,
65816
  "total_flos": 2.453675202191819e+18,
65817
  "train_loss": 0.10362623064493919,
65818
- "train_runtime": 29327.4396,
65819
- "train_samples_per_second": 21.823,
65820
- "train_steps_per_second": 1.364
65821
  }
65822
  ],
65823
  "logging_steps": 5,
 
332
  {
333
  "epoch": 0.6182380216383307,
334
  "eval_loss": 0.9577658176422119,
335
+ "eval_runtime": 11.2706,
336
+ "eval_samples_per_second": 51.018,
337
+ "eval_steps_per_second": 12.777,
338
  "num_input_tokens_seen": 272576,
339
  "step": 200
340
  },
 
661
  {
662
  "epoch": 1.2349304482225656,
663
  "eval_loss": 0.7184381484985352,
664
+ "eval_runtime": 11.278,
665
+ "eval_samples_per_second": 50.984,
666
+ "eval_steps_per_second": 12.768,
667
  "num_input_tokens_seen": 544096,
668
  "step": 400
669
  },
 
990
  {
991
  "epoch": 1.8531684698608966,
992
  "eval_loss": 0.6815493106842041,
993
+ "eval_runtime": 11.2856,
994
+ "eval_samples_per_second": 50.95,
995
+ "eval_steps_per_second": 12.76,
996
  "num_input_tokens_seen": 818048,
997
  "step": 600
998
  },
 
1319
  {
1320
  "epoch": 2.469860896445131,
1321
  "eval_loss": 0.6753404140472412,
1322
+ "eval_runtime": 11.2983,
1323
+ "eval_samples_per_second": 50.893,
1324
+ "eval_steps_per_second": 12.745,
1325
  "num_input_tokens_seen": 1089600,
1326
  "step": 800
1327
  },
 
1648
  {
1649
  "epoch": 3.0865533230293662,
1650
  "eval_loss": 0.6587666273117065,
1651
+ "eval_runtime": 11.289,
1652
+ "eval_samples_per_second": 50.935,
1653
+ "eval_steps_per_second": 12.756,
1654
  "num_input_tokens_seen": 1361504,
1655
  "step": 1000
1656
  },
 
1977
  {
1978
  "epoch": 3.704791344667697,
1979
  "eval_loss": 0.6507958173751831,
1980
+ "eval_runtime": 11.2821,
1981
+ "eval_samples_per_second": 50.966,
1982
+ "eval_steps_per_second": 12.764,
1983
  "num_input_tokens_seen": 1636960,
1984
  "step": 1200
1985
  },
 
2306
  {
2307
  "epoch": 4.321483771251932,
2308
  "eval_loss": 0.6580312252044678,
2309
+ "eval_runtime": 11.2952,
2310
+ "eval_samples_per_second": 50.907,
2311
+ "eval_steps_per_second": 12.749,
2312
  "num_input_tokens_seen": 1909696,
2313
  "step": 1400
2314
  },
 
2635
  {
2636
  "epoch": 4.939721792890262,
2637
  "eval_loss": 0.6381492614746094,
2638
+ "eval_runtime": 11.2842,
2639
+ "eval_samples_per_second": 50.956,
2640
+ "eval_steps_per_second": 12.761,
2641
  "num_input_tokens_seen": 2182656,
2642
  "step": 1600
2643
  },
 
2964
  {
2965
  "epoch": 5.556414219474497,
2966
  "eval_loss": 0.6330167055130005,
2967
+ "eval_runtime": 11.288,
2968
+ "eval_samples_per_second": 50.939,
2969
+ "eval_steps_per_second": 12.757,
2970
  "num_input_tokens_seen": 2453904,
2971
  "step": 1800
2972
  },
 
3293
  {
3294
  "epoch": 6.1731066460587325,
3295
  "eval_loss": 0.6232376098632812,
3296
+ "eval_runtime": 11.318,
3297
+ "eval_samples_per_second": 50.804,
3298
+ "eval_steps_per_second": 12.723,
3299
  "num_input_tokens_seen": 2727984,
3300
  "step": 2000
3301
  },
 
3622
  {
3623
  "epoch": 6.7913446676970635,
3624
  "eval_loss": 0.6167892813682556,
3625
+ "eval_runtime": 11.2955,
3626
+ "eval_samples_per_second": 50.905,
3627
+ "eval_steps_per_second": 12.748,
3628
  "num_input_tokens_seen": 2999760,
3629
  "step": 2200
3630
  },
 
3951
  {
3952
  "epoch": 7.4080370942812985,
3953
  "eval_loss": 0.5621501207351685,
3954
+ "eval_runtime": 11.2909,
3955
+ "eval_samples_per_second": 50.926,
3956
+ "eval_steps_per_second": 12.754,
3957
  "num_input_tokens_seen": 3274528,
3958
  "step": 2400
3959
  },
 
4280
  {
4281
  "epoch": 8.024729520865533,
4282
  "eval_loss": 0.5813793540000916,
4283
+ "eval_runtime": 11.2998,
4284
+ "eval_samples_per_second": 50.886,
4285
+ "eval_steps_per_second": 12.744,
4286
  "num_input_tokens_seen": 3546880,
4287
  "step": 2600
4288
  },
 
4609
  {
4610
  "epoch": 8.642967542503865,
4611
  "eval_loss": 0.5915025472640991,
4612
+ "eval_runtime": 11.2948,
4613
+ "eval_samples_per_second": 50.909,
4614
+ "eval_steps_per_second": 12.749,
4615
  "num_input_tokens_seen": 3821184,
4616
  "step": 2800
4617
  },
 
4938
  {
4939
  "epoch": 9.2596599690881,
4940
  "eval_loss": 0.5584082007408142,
4941
+ "eval_runtime": 11.2865,
4942
+ "eval_samples_per_second": 50.946,
4943
+ "eval_steps_per_second": 12.759,
4944
  "num_input_tokens_seen": 4090704,
4945
  "step": 3000
4946
  },
 
5267
  {
5268
  "epoch": 9.87789799072643,
5269
  "eval_loss": 0.562062680721283,
5270
+ "eval_runtime": 11.2889,
5271
+ "eval_samples_per_second": 50.935,
5272
+ "eval_steps_per_second": 12.756,
5273
  "num_input_tokens_seen": 4363696,
5274
  "step": 3200
5275
  },
 
5596
  {
5597
  "epoch": 10.494590417310665,
5598
  "eval_loss": 0.5493518114089966,
5599
+ "eval_runtime": 11.2948,
5600
+ "eval_samples_per_second": 50.909,
5601
+ "eval_steps_per_second": 12.749,
5602
  "num_input_tokens_seen": 4636656,
5603
  "step": 3400
5604
  },
 
5925
  {
5926
  "epoch": 11.1112828438949,
5927
  "eval_loss": 0.5832644701004028,
5928
+ "eval_runtime": 11.2861,
5929
+ "eval_samples_per_second": 50.948,
5930
+ "eval_steps_per_second": 12.759,
5931
  "num_input_tokens_seen": 4908928,
5932
  "step": 3600
5933
  },
 
6254
  {
6255
  "epoch": 11.72952086553323,
6256
  "eval_loss": 0.5668447017669678,
6257
+ "eval_runtime": 11.3543,
6258
+ "eval_samples_per_second": 50.641,
6259
+ "eval_steps_per_second": 12.682,
6260
  "num_input_tokens_seen": 5179040,
6261
  "step": 3800
6262
  },
 
6583
  {
6584
  "epoch": 12.346213292117465,
6585
  "eval_loss": 0.5749086737632751,
6586
+ "eval_runtime": 11.3124,
6587
+ "eval_samples_per_second": 50.829,
6588
+ "eval_steps_per_second": 12.729,
6589
  "num_input_tokens_seen": 5452192,
6590
  "step": 4000
6591
  },
 
6912
  {
6913
  "epoch": 12.964451313755795,
6914
  "eval_loss": 0.564673900604248,
6915
+ "eval_runtime": 11.2981,
6916
+ "eval_samples_per_second": 50.894,
6917
+ "eval_steps_per_second": 12.746,
6918
  "num_input_tokens_seen": 5724448,
6919
  "step": 4200
6920
  },
 
7241
  {
7242
  "epoch": 13.58114374034003,
7243
  "eval_loss": 0.557171642780304,
7244
+ "eval_runtime": 11.2883,
7245
+ "eval_samples_per_second": 50.938,
7246
+ "eval_steps_per_second": 12.757,
7247
  "num_input_tokens_seen": 5998032,
7248
  "step": 4400
7249
  },
 
7570
  {
7571
  "epoch": 14.197836166924265,
7572
  "eval_loss": 0.5686624050140381,
7573
+ "eval_runtime": 11.2932,
7574
+ "eval_samples_per_second": 50.916,
7575
+ "eval_steps_per_second": 12.751,
7576
  "num_input_tokens_seen": 6269792,
7577
  "step": 4600
7578
  },
 
7899
  {
7900
  "epoch": 14.816074188562597,
7901
  "eval_loss": 0.5626024603843689,
7902
+ "eval_runtime": 11.2851,
7903
+ "eval_samples_per_second": 50.952,
7904
+ "eval_steps_per_second": 12.76,
7905
  "num_input_tokens_seen": 6541248,
7906
  "step": 4800
7907
  },
 
8228
  {
8229
  "epoch": 15.432766615146832,
8230
  "eval_loss": 0.5851988196372986,
8231
+ "eval_runtime": 11.2997,
8232
+ "eval_samples_per_second": 50.886,
8233
+ "eval_steps_per_second": 12.744,
8234
  "num_input_tokens_seen": 6815200,
8235
  "step": 5000
8236
  },
 
8557
  {
8558
  "epoch": 16.049459041731065,
8559
  "eval_loss": 0.6189093589782715,
8560
+ "eval_runtime": 11.3118,
8561
+ "eval_samples_per_second": 50.832,
8562
+ "eval_steps_per_second": 12.73,
8563
  "num_input_tokens_seen": 7086224,
8564
  "step": 5200
8565
  },
 
8886
  {
8887
  "epoch": 16.667697063369395,
8888
  "eval_loss": 0.6122633814811707,
8889
+ "eval_runtime": 11.3022,
8890
+ "eval_samples_per_second": 50.875,
8891
+ "eval_steps_per_second": 12.741,
8892
  "num_input_tokens_seen": 7360560,
8893
  "step": 5400
8894
  },
 
9215
  {
9216
  "epoch": 17.284389489953632,
9217
  "eval_loss": 0.611182689666748,
9218
+ "eval_runtime": 11.2991,
9219
+ "eval_samples_per_second": 50.889,
9220
+ "eval_steps_per_second": 12.744,
9221
  "num_input_tokens_seen": 7632240,
9222
  "step": 5600
9223
  },
 
9544
  {
9545
  "epoch": 17.902627511591962,
9546
  "eval_loss": 0.5843232274055481,
9547
+ "eval_runtime": 11.2893,
9548
+ "eval_samples_per_second": 50.933,
9549
+ "eval_steps_per_second": 12.755,
9550
  "num_input_tokens_seen": 7904432,
9551
  "step": 5800
9552
  },
 
9873
  {
9874
  "epoch": 18.5193199381762,
9875
  "eval_loss": 0.6198561191558838,
9876
+ "eval_runtime": 11.3112,
9877
+ "eval_samples_per_second": 50.835,
9878
+ "eval_steps_per_second": 12.731,
9879
  "num_input_tokens_seen": 8177168,
9880
  "step": 6000
9881
  },
 
10202
  {
10203
  "epoch": 19.136012364760433,
10204
  "eval_loss": 0.6794010996818542,
10205
+ "eval_runtime": 11.2936,
10206
+ "eval_samples_per_second": 50.914,
10207
+ "eval_steps_per_second": 12.751,
10208
  "num_input_tokens_seen": 8449968,
10209
  "step": 6200
10210
  },
 
10531
  {
10532
  "epoch": 19.754250386398763,
10533
  "eval_loss": 0.6374606490135193,
10534
+ "eval_runtime": 11.2831,
10535
+ "eval_samples_per_second": 50.961,
10536
+ "eval_steps_per_second": 12.762,
10537
  "num_input_tokens_seen": 8722992,
10538
  "step": 6400
10539
  },
 
10860
  {
10861
  "epoch": 20.370942812983,
10862
  "eval_loss": 0.6705669164657593,
10863
+ "eval_runtime": 11.2994,
10864
+ "eval_samples_per_second": 50.888,
10865
+ "eval_steps_per_second": 12.744,
10866
  "num_input_tokens_seen": 8996224,
10867
  "step": 6600
10868
  },
 
11189
  {
11190
  "epoch": 20.98918083462133,
11191
  "eval_loss": 0.648054838180542,
11192
+ "eval_runtime": 11.2898,
11193
+ "eval_samples_per_second": 50.931,
11194
+ "eval_steps_per_second": 12.755,
11195
  "num_input_tokens_seen": 9269504,
11196
  "step": 6800
11197
  },
 
11518
  {
11519
  "epoch": 21.605873261205563,
11520
  "eval_loss": 0.7299332618713379,
11521
+ "eval_runtime": 11.3095,
11522
+ "eval_samples_per_second": 50.842,
11523
+ "eval_steps_per_second": 12.733,
11524
  "num_input_tokens_seen": 9542432,
11525
  "step": 7000
11526
  },
 
11847
  {
11848
  "epoch": 22.2225656877898,
11849
  "eval_loss": 0.7840644717216492,
11850
+ "eval_runtime": 11.3076,
11851
+ "eval_samples_per_second": 50.851,
11852
+ "eval_steps_per_second": 12.735,
11853
  "num_input_tokens_seen": 9812704,
11854
  "step": 7200
11855
  },
 
12176
  {
12177
  "epoch": 22.84080370942813,
12178
  "eval_loss": 0.7381678819656372,
12179
+ "eval_runtime": 11.3001,
12180
+ "eval_samples_per_second": 50.885,
12181
+ "eval_steps_per_second": 12.743,
12182
  "num_input_tokens_seen": 10086272,
12183
  "step": 7400
12184
  },
 
12505
  {
12506
  "epoch": 23.457496136012363,
12507
  "eval_loss": 0.7728149890899658,
12508
+ "eval_runtime": 11.2886,
12509
+ "eval_samples_per_second": 50.936,
12510
+ "eval_steps_per_second": 12.756,
12511
  "num_input_tokens_seen": 10358832,
12512
  "step": 7600
12513
  },
 
12834
  {
12835
  "epoch": 24.0741885625966,
12836
  "eval_loss": 0.8268849849700928,
12837
+ "eval_runtime": 11.2998,
12838
+ "eval_samples_per_second": 50.886,
12839
+ "eval_steps_per_second": 12.744,
12840
  "num_input_tokens_seen": 10630000,
12841
  "step": 7800
12842
  },
 
13163
  {
13164
  "epoch": 24.69242658423493,
13165
  "eval_loss": 0.8175145983695984,
13166
+ "eval_runtime": 11.2902,
13167
+ "eval_samples_per_second": 50.929,
13168
+ "eval_steps_per_second": 12.754,
13169
  "num_input_tokens_seen": 10904880,
13170
  "step": 8000
13171
  },
 
13492
  {
13493
  "epoch": 25.309119010819167,
13494
  "eval_loss": 0.8719689249992371,
13495
+ "eval_runtime": 11.2821,
13496
+ "eval_samples_per_second": 50.965,
13497
+ "eval_steps_per_second": 12.764,
13498
  "num_input_tokens_seen": 11176208,
13499
  "step": 8200
13500
  },
 
13821
  {
13822
  "epoch": 25.927357032457497,
13823
  "eval_loss": 0.9041878581047058,
13824
+ "eval_runtime": 11.2871,
13825
+ "eval_samples_per_second": 50.943,
13826
+ "eval_steps_per_second": 12.758,
13827
  "num_input_tokens_seen": 11451344,
13828
  "step": 8400
13829
  },
 
14150
  {
14151
  "epoch": 26.54404945904173,
14152
  "eval_loss": 0.8620166778564453,
14153
+ "eval_runtime": 11.3068,
14154
+ "eval_samples_per_second": 50.854,
14155
+ "eval_steps_per_second": 12.736,
14156
  "num_input_tokens_seen": 11723328,
14157
  "step": 8600
14158
  },
 
14479
  {
14480
  "epoch": 27.160741885625967,
14481
  "eval_loss": 0.9756768345832825,
14482
+ "eval_runtime": 11.2984,
14483
+ "eval_samples_per_second": 50.892,
14484
+ "eval_steps_per_second": 12.745,
14485
  "num_input_tokens_seen": 11996224,
14486
  "step": 8800
14487
  },
 
14808
  {
14809
  "epoch": 27.778979907264297,
14810
  "eval_loss": 0.9385554194450378,
14811
+ "eval_runtime": 11.3054,
14812
+ "eval_samples_per_second": 50.86,
14813
+ "eval_steps_per_second": 12.737,
14814
  "num_input_tokens_seen": 12267520,
14815
  "step": 9000
14816
  },
 
15137
  {
15138
  "epoch": 28.39567233384853,
15139
  "eval_loss": 0.9237757921218872,
15140
+ "eval_runtime": 11.3005,
15141
+ "eval_samples_per_second": 50.883,
15142
+ "eval_steps_per_second": 12.743,
15143
  "num_input_tokens_seen": 12542064,
15144
  "step": 9200
15145
  },
 
15466
  {
15467
  "epoch": 29.012364760432767,
15468
  "eval_loss": 1.064571499824524,
15469
+ "eval_runtime": 11.2975,
15470
+ "eval_samples_per_second": 50.896,
15471
+ "eval_steps_per_second": 12.746,
15472
  "num_input_tokens_seen": 12812048,
15473
  "step": 9400
15474
  },
 
15795
  {
15796
  "epoch": 29.630602782071097,
15797
  "eval_loss": 1.0749653577804565,
15798
+ "eval_runtime": 11.2962,
15799
+ "eval_samples_per_second": 50.902,
15800
+ "eval_steps_per_second": 12.748,
15801
  "num_input_tokens_seen": 13085264,
15802
  "step": 9600
15803
  },
 
16124
  {
16125
  "epoch": 30.24729520865533,
16126
  "eval_loss": 1.0077648162841797,
16127
+ "eval_runtime": 11.3049,
16128
+ "eval_samples_per_second": 50.863,
16129
+ "eval_steps_per_second": 12.738,
16130
  "num_input_tokens_seen": 13356384,
16131
  "step": 9800
16132
  },
 
16453
  {
16454
  "epoch": 30.865533230293664,
16455
  "eval_loss": 1.057982325553894,
16456
+ "eval_runtime": 11.3018,
16457
+ "eval_samples_per_second": 50.877,
16458
+ "eval_steps_per_second": 12.741,
16459
  "num_input_tokens_seen": 13629216,
16460
  "step": 10000
16461
  },
 
16782
  {
16783
  "epoch": 31.482225656877898,
16784
  "eval_loss": 1.0450738668441772,
16785
+ "eval_runtime": 11.2984,
16786
+ "eval_samples_per_second": 50.892,
16787
+ "eval_steps_per_second": 12.745,
16788
  "num_input_tokens_seen": 13902736,
16789
  "step": 10200
16790
  },
 
17111
  {
17112
  "epoch": 32.09891808346213,
17113
  "eval_loss": 1.0477303266525269,
17114
+ "eval_runtime": 11.2927,
17115
+ "eval_samples_per_second": 50.918,
17116
+ "eval_steps_per_second": 12.752,
17117
  "num_input_tokens_seen": 14174192,
17118
  "step": 10400
17119
  },
 
17440
  {
17441
  "epoch": 32.717156105100464,
17442
  "eval_loss": 1.14347243309021,
17443
+ "eval_runtime": 11.3094,
17444
+ "eval_samples_per_second": 50.843,
17445
+ "eval_steps_per_second": 12.733,
17446
  "num_input_tokens_seen": 14448176,
17447
  "step": 10600
17448
  },
 
17769
  {
17770
  "epoch": 33.3338485316847,
17771
  "eval_loss": 1.0730254650115967,
17772
+ "eval_runtime": 11.3076,
17773
+ "eval_samples_per_second": 50.851,
17774
+ "eval_steps_per_second": 12.735,
17775
  "num_input_tokens_seen": 14718096,
17776
  "step": 10800
17777
  },
 
18098
  {
18099
  "epoch": 33.95208655332303,
18100
  "eval_loss": 1.0351147651672363,
18101
+ "eval_runtime": 11.2934,
18102
+ "eval_samples_per_second": 50.915,
18103
+ "eval_steps_per_second": 12.751,
18104
  "num_input_tokens_seen": 14992048,
18105
  "step": 11000
18106
  },
 
18427
  {
18428
  "epoch": 34.568778979907265,
18429
  "eval_loss": 1.1394553184509277,
18430
+ "eval_runtime": 11.3011,
18431
+ "eval_samples_per_second": 50.88,
18432
+ "eval_steps_per_second": 12.742,
18433
  "num_input_tokens_seen": 15265072,
18434
  "step": 11200
18435
  },
 
18756
  {
18757
  "epoch": 35.1854714064915,
18758
  "eval_loss": 1.1201566457748413,
18759
+ "eval_runtime": 11.2937,
18760
+ "eval_samples_per_second": 50.913,
18761
+ "eval_steps_per_second": 12.75,
18762
  "num_input_tokens_seen": 15538960,
18763
  "step": 11400
18764
  },
 
19085
  {
19086
  "epoch": 35.80370942812983,
19087
  "eval_loss": 1.133685827255249,
19088
+ "eval_runtime": 11.2974,
19089
+ "eval_samples_per_second": 50.897,
19090
+ "eval_steps_per_second": 12.746,
19091
  "num_input_tokens_seen": 15812880,
19092
  "step": 11600
19093
  },
 
19414
  {
19415
  "epoch": 36.420401854714065,
19416
  "eval_loss": 1.1776589155197144,
19417
+ "eval_runtime": 11.3188,
19418
+ "eval_samples_per_second": 50.8,
19419
+ "eval_steps_per_second": 12.722,
19420
  "num_input_tokens_seen": 16082608,
19421
  "step": 11800
19422
  },
 
19743
  {
19744
  "epoch": 37.0370942812983,
19745
  "eval_loss": 1.18972909450531,
19746
+ "eval_runtime": 11.3021,
19747
+ "eval_samples_per_second": 50.875,
19748
+ "eval_steps_per_second": 12.741,
19749
  "num_input_tokens_seen": 16357888,
19750
  "step": 12000
19751
  },
 
20072
  {
20073
  "epoch": 37.65533230293663,
20074
  "eval_loss": 1.2221449613571167,
20075
+ "eval_runtime": 11.2926,
20076
+ "eval_samples_per_second": 50.918,
20077
+ "eval_steps_per_second": 12.752,
20078
  "num_input_tokens_seen": 16627872,
20079
  "step": 12200
20080
  },
 
20401
  {
20402
  "epoch": 38.272024729520865,
20403
  "eval_loss": 1.169758677482605,
20404
+ "eval_runtime": 11.2954,
20405
+ "eval_samples_per_second": 50.906,
20406
+ "eval_steps_per_second": 12.749,
20407
  "num_input_tokens_seen": 16900336,
20408
  "step": 12400
20409
  },
 
20730
  {
20731
  "epoch": 38.8902627511592,
20732
  "eval_loss": 1.1674479246139526,
20733
+ "eval_runtime": 11.3053,
20734
+ "eval_samples_per_second": 50.861,
20735
+ "eval_steps_per_second": 12.737,
20736
  "num_input_tokens_seen": 17175024,
20737
  "step": 12600
20738
  },
 
21059
  {
21060
  "epoch": 39.50695517774343,
21061
  "eval_loss": 1.1664071083068848,
21062
+ "eval_runtime": 11.2986,
21063
+ "eval_samples_per_second": 50.891,
21064
+ "eval_steps_per_second": 12.745,
21065
  "num_input_tokens_seen": 17446864,
21066
  "step": 12800
21067
  },
 
21388
  {
21389
  "epoch": 40.123647604327665,
21390
  "eval_loss": 1.2493196725845337,
21391
+ "eval_runtime": 11.2977,
21392
+ "eval_samples_per_second": 50.895,
21393
+ "eval_steps_per_second": 12.746,
21394
  "num_input_tokens_seen": 17716560,
21395
  "step": 13000
21396
  },
 
21717
  {
21718
  "epoch": 40.741885625966,
21719
  "eval_loss": 1.3186978101730347,
21720
+ "eval_runtime": 11.3068,
21721
+ "eval_samples_per_second": 50.855,
21722
+ "eval_steps_per_second": 12.736,
21723
  "num_input_tokens_seen": 17991792,
21724
  "step": 13200
21725
  },
 
22046
  {
22047
  "epoch": 41.35857805255023,
22048
  "eval_loss": 1.256793737411499,
22049
+ "eval_runtime": 11.299,
22050
+ "eval_samples_per_second": 50.889,
22051
+ "eval_steps_per_second": 12.744,
22052
  "num_input_tokens_seen": 18262992,
22053
  "step": 13400
22054
  },
 
22375
  {
22376
  "epoch": 41.97681607418856,
22377
  "eval_loss": 1.2447845935821533,
22378
+ "eval_runtime": 11.3003,
22379
+ "eval_samples_per_second": 50.884,
22380
+ "eval_steps_per_second": 12.743,
22381
  "num_input_tokens_seen": 18536880,
22382
  "step": 13600
22383
  },
 
22704
  {
22705
  "epoch": 42.5935085007728,
22706
  "eval_loss": 1.233655333518982,
22707
+ "eval_runtime": 11.2947,
22708
+ "eval_samples_per_second": 50.909,
22709
+ "eval_steps_per_second": 12.749,
22710
  "num_input_tokens_seen": 18806784,
22711
  "step": 13800
22712
  },
 
23033
  {
23034
  "epoch": 43.210200927357036,
23035
  "eval_loss": 1.254447102546692,
23036
+ "eval_runtime": 11.3401,
23037
+ "eval_samples_per_second": 50.705,
23038
+ "eval_steps_per_second": 12.698,
23039
  "num_input_tokens_seen": 19080608,
23040
  "step": 14000
23041
  },
 
23362
  {
23363
  "epoch": 43.82843894899536,
23364
  "eval_loss": 1.3475619554519653,
23365
+ "eval_runtime": 11.3306,
23366
+ "eval_samples_per_second": 50.748,
23367
+ "eval_steps_per_second": 12.709,
23368
  "num_input_tokens_seen": 19352320,
23369
  "step": 14200
23370
  },
 
23691
  {
23692
  "epoch": 44.4451313755796,
23693
  "eval_loss": 1.2956358194351196,
23694
+ "eval_runtime": 11.3002,
23695
+ "eval_samples_per_second": 50.884,
23696
+ "eval_steps_per_second": 12.743,
23697
  "num_input_tokens_seen": 19624544,
23698
  "step": 14400
23699
  },
 
24020
  {
24021
  "epoch": 45.061823802163836,
24022
  "eval_loss": 1.214294195175171,
24023
+ "eval_runtime": 11.3055,
24024
+ "eval_samples_per_second": 50.86,
24025
+ "eval_steps_per_second": 12.737,
24026
  "num_input_tokens_seen": 19896064,
24027
  "step": 14600
24028
  },
 
24349
  {
24350
  "epoch": 45.68006182380216,
24351
  "eval_loss": 1.200486660003662,
24352
+ "eval_runtime": 11.2896,
24353
+ "eval_samples_per_second": 50.932,
24354
+ "eval_steps_per_second": 12.755,
24355
  "num_input_tokens_seen": 20168064,
24356
  "step": 14800
24357
  },
 
24678
  {
24679
  "epoch": 46.2967542503864,
24680
  "eval_loss": 1.3230748176574707,
24681
+ "eval_runtime": 11.2946,
24682
+ "eval_samples_per_second": 50.909,
24683
+ "eval_steps_per_second": 12.749,
24684
  "num_input_tokens_seen": 20440208,
24685
  "step": 15000
24686
  },
 
25007
  {
25008
  "epoch": 46.914992272024726,
25009
  "eval_loss": 1.2638696432113647,
25010
+ "eval_runtime": 11.3107,
25011
+ "eval_samples_per_second": 50.837,
25012
+ "eval_steps_per_second": 12.731,
25013
  "num_input_tokens_seen": 20713296,
25014
  "step": 15200
25015
  },
 
25336
  {
25337
  "epoch": 47.53168469860896,
25338
  "eval_loss": 1.3379755020141602,
25339
+ "eval_runtime": 11.306,
25340
+ "eval_samples_per_second": 50.858,
25341
+ "eval_steps_per_second": 12.737,
25342
  "num_input_tokens_seen": 20985744,
25343
  "step": 15400
25344
  },
 
25665
  {
25666
  "epoch": 48.1483771251932,
25667
  "eval_loss": 1.2503776550292969,
25668
+ "eval_runtime": 11.2966,
25669
+ "eval_samples_per_second": 50.9,
25670
+ "eval_steps_per_second": 12.747,
25671
  "num_input_tokens_seen": 21257920,
25672
  "step": 15600
25673
  },
 
25994
  {
25995
  "epoch": 48.76661514683153,
25996
  "eval_loss": 1.2862586975097656,
25997
+ "eval_runtime": 11.3031,
25998
+ "eval_samples_per_second": 50.871,
25999
+ "eval_steps_per_second": 12.74,
26000
  "num_input_tokens_seen": 21529248,
26001
  "step": 15800
26002
  },
 
26323
  {
26324
  "epoch": 49.38330757341576,
26325
  "eval_loss": 1.312309741973877,
26326
+ "eval_runtime": 11.2944,
26327
+ "eval_samples_per_second": 50.91,
26328
+ "eval_steps_per_second": 12.75,
26329
  "num_input_tokens_seen": 21800992,
26330
  "step": 16000
26331
  },
 
26652
  {
26653
  "epoch": 50.0,
26654
  "eval_loss": 1.2966762781143188,
26655
+ "eval_runtime": 11.2763,
26656
+ "eval_samples_per_second": 50.992,
26657
+ "eval_steps_per_second": 12.77,
26658
  "num_input_tokens_seen": 22073392,
26659
  "step": 16200
26660
  },
 
26981
  {
26982
  "epoch": 50.618238021638334,
26983
  "eval_loss": 1.3632538318634033,
26984
+ "eval_runtime": 11.3003,
26985
+ "eval_samples_per_second": 50.884,
26986
+ "eval_steps_per_second": 12.743,
26987
  "num_input_tokens_seen": 22345648,
26988
  "step": 16400
26989
  },
 
27310
  {
27311
  "epoch": 51.23493044822256,
27312
  "eval_loss": 1.3670175075531006,
27313
+ "eval_runtime": 11.296,
27314
+ "eval_samples_per_second": 50.903,
27315
+ "eval_steps_per_second": 12.748,
27316
  "num_input_tokens_seen": 22617984,
27317
  "step": 16600
27318
  },
 
27639
  {
27640
  "epoch": 51.8531684698609,
27641
  "eval_loss": 1.3320527076721191,
27642
+ "eval_runtime": 11.3009,
27643
+ "eval_samples_per_second": 50.881,
27644
+ "eval_steps_per_second": 12.742,
27645
  "num_input_tokens_seen": 22892544,
27646
  "step": 16800
27647
  },
 
27968
  {
27969
  "epoch": 52.469860896445134,
27970
  "eval_loss": 1.430206537246704,
27971
+ "eval_runtime": 11.3085,
27972
+ "eval_samples_per_second": 50.847,
27973
+ "eval_steps_per_second": 12.734,
27974
  "num_input_tokens_seen": 23163488,
27975
  "step": 17000
27976
  },
 
28297
  {
28298
  "epoch": 53.086553323029364,
28299
  "eval_loss": 1.321289300918579,
28300
+ "eval_runtime": 11.3024,
28301
+ "eval_samples_per_second": 50.874,
28302
+ "eval_steps_per_second": 12.741,
28303
  "num_input_tokens_seen": 23438320,
28304
  "step": 17200
28305
  },
 
28626
  {
28627
  "epoch": 53.7047913446677,
28628
  "eval_loss": 1.4570552110671997,
28629
+ "eval_runtime": 11.3064,
28630
+ "eval_samples_per_second": 50.856,
28631
+ "eval_steps_per_second": 12.736,
28632
  "num_input_tokens_seen": 23708720,
28633
  "step": 17400
28634
  },
 
28955
  {
28956
  "epoch": 54.321483771251934,
28957
  "eval_loss": 1.3873727321624756,
28958
+ "eval_runtime": 11.2922,
28959
+ "eval_samples_per_second": 50.92,
28960
+ "eval_steps_per_second": 12.752,
28961
  "num_input_tokens_seen": 23984304,
28962
  "step": 17600
28963
  },
 
29284
  {
29285
  "epoch": 54.93972179289026,
29286
  "eval_loss": 1.4202662706375122,
29287
+ "eval_runtime": 11.3232,
29288
+ "eval_samples_per_second": 50.781,
29289
+ "eval_steps_per_second": 12.717,
29290
  "num_input_tokens_seen": 24256368,
29291
  "step": 17800
29292
  },
 
29613
  {
29614
  "epoch": 55.5564142194745,
29615
  "eval_loss": 1.3830780982971191,
29616
+ "eval_runtime": 11.3013,
29617
+ "eval_samples_per_second": 50.879,
29618
+ "eval_steps_per_second": 12.742,
29619
  "num_input_tokens_seen": 24527040,
29620
  "step": 18000
29621
  },
 
29942
  {
29943
  "epoch": 56.173106646058734,
29944
  "eval_loss": 1.4858934879302979,
29945
+ "eval_runtime": 11.2884,
29946
+ "eval_samples_per_second": 50.937,
29947
+ "eval_steps_per_second": 12.756,
29948
  "num_input_tokens_seen": 24799312,
29949
  "step": 18200
29950
  },
 
30271
  {
30272
  "epoch": 56.79134466769706,
30273
  "eval_loss": 1.5054408311843872,
30274
+ "eval_runtime": 11.2984,
30275
+ "eval_samples_per_second": 50.892,
30276
+ "eval_steps_per_second": 12.745,
30277
  "num_input_tokens_seen": 25072848,
30278
  "step": 18400
30279
  },
 
30600
  {
30601
  "epoch": 57.4080370942813,
30602
  "eval_loss": 1.473268747329712,
30603
+ "eval_runtime": 11.3001,
30604
+ "eval_samples_per_second": 50.884,
30605
+ "eval_steps_per_second": 12.743,
30606
  "num_input_tokens_seen": 25347056,
30607
  "step": 18600
30608
  },
 
30929
  {
30930
  "epoch": 58.024729520865534,
30931
  "eval_loss": 1.5095571279525757,
30932
+ "eval_runtime": 11.2966,
30933
+ "eval_samples_per_second": 50.9,
30934
+ "eval_steps_per_second": 12.747,
30935
  "num_input_tokens_seen": 25618400,
30936
  "step": 18800
30937
  },
 
31258
  {
31259
  "epoch": 58.64296754250386,
31260
  "eval_loss": 1.3225481510162354,
31261
+ "eval_runtime": 11.3183,
31262
+ "eval_samples_per_second": 50.803,
31263
+ "eval_steps_per_second": 12.723,
31264
  "num_input_tokens_seen": 25892960,
31265
  "step": 19000
31266
  },
 
31587
  {
31588
  "epoch": 59.2596599690881,
31589
  "eval_loss": 1.4172106981277466,
31590
+ "eval_runtime": 11.3028,
31591
+ "eval_samples_per_second": 50.872,
31592
+ "eval_steps_per_second": 12.74,
31593
  "num_input_tokens_seen": 26164688,
31594
  "step": 19200
31595
  },
 
31916
  {
31917
  "epoch": 59.87789799072643,
31918
  "eval_loss": 1.35789155960083,
31919
+ "eval_runtime": 11.2938,
31920
+ "eval_samples_per_second": 50.913,
31921
+ "eval_steps_per_second": 12.75,
31922
  "num_input_tokens_seen": 26437392,
31923
  "step": 19400
31924
  },
 
32245
  {
32246
  "epoch": 60.49459041731066,
32247
  "eval_loss": 1.406263828277588,
32248
+ "eval_runtime": 11.2941,
32249
+ "eval_samples_per_second": 50.911,
32250
+ "eval_steps_per_second": 12.75,
32251
  "num_input_tokens_seen": 26710176,
32252
  "step": 19600
32253
  },
 
32574
  {
32575
  "epoch": 61.1112828438949,
32576
  "eval_loss": 1.4265893697738647,
32577
+ "eval_runtime": 11.3038,
32578
+ "eval_samples_per_second": 50.868,
32579
+ "eval_steps_per_second": 12.739,
32580
  "num_input_tokens_seen": 26981728,
32581
  "step": 19800
32582
  },
 
32903
  {
32904
  "epoch": 61.72952086553323,
32905
  "eval_loss": 1.3551362752914429,
32906
+ "eval_runtime": 11.2955,
32907
+ "eval_samples_per_second": 50.905,
32908
+ "eval_steps_per_second": 12.748,
32909
  "num_input_tokens_seen": 27253632,
32910
  "step": 20000
32911
  },
 
33232
  {
33233
  "epoch": 62.34621329211747,
33234
  "eval_loss": 1.4743679761886597,
33235
+ "eval_runtime": 11.2915,
33236
+ "eval_samples_per_second": 50.923,
33237
+ "eval_steps_per_second": 12.753,
33238
  "num_input_tokens_seen": 27524928,
33239
  "step": 20200
33240
  },
 
33561
  {
33562
  "epoch": 62.964451313755795,
33563
  "eval_loss": 1.5115978717803955,
33564
+ "eval_runtime": 11.314,
33565
+ "eval_samples_per_second": 50.822,
33566
+ "eval_steps_per_second": 12.728,
33567
  "num_input_tokens_seen": 27799712,
33568
  "step": 20400
33569
  },
 
33890
  {
33891
  "epoch": 63.58114374034003,
33892
  "eval_loss": 1.5977118015289307,
33893
+ "eval_runtime": 11.3135,
33894
+ "eval_samples_per_second": 50.824,
33895
+ "eval_steps_per_second": 12.728,
33896
  "num_input_tokens_seen": 28071024,
33897
  "step": 20600
33898
  },
 
34219
  {
34220
  "epoch": 64.19783616692426,
34221
  "eval_loss": 1.5763089656829834,
34222
+ "eval_runtime": 11.2999,
34223
+ "eval_samples_per_second": 50.885,
34224
+ "eval_steps_per_second": 12.743,
34225
  "num_input_tokens_seen": 28342880,
34226
  "step": 20800
34227
  },
 
34548
  {
34549
  "epoch": 64.8160741885626,
34550
  "eval_loss": 1.6289054155349731,
34551
+ "eval_runtime": 11.3101,
34552
+ "eval_samples_per_second": 50.839,
34553
+ "eval_steps_per_second": 12.732,
34554
  "num_input_tokens_seen": 28617696,
34555
  "step": 21000
34556
  },
 
34877
  {
34878
  "epoch": 65.43276661514683,
34879
  "eval_loss": 1.6688075065612793,
34880
+ "eval_runtime": 11.304,
34881
+ "eval_samples_per_second": 50.867,
34882
+ "eval_steps_per_second": 12.739,
34883
  "num_input_tokens_seen": 28888112,
34884
  "step": 21200
34885
  },
 
35206
  {
35207
  "epoch": 66.04945904173107,
35208
  "eval_loss": 1.6155662536621094,
35209
+ "eval_runtime": 11.3029,
35210
+ "eval_samples_per_second": 50.872,
35211
+ "eval_steps_per_second": 12.74,
35212
  "num_input_tokens_seen": 29162944,
35213
  "step": 21400
35214
  },
 
35535
  {
35536
  "epoch": 66.6676970633694,
35537
  "eval_loss": 1.6828913688659668,
35538
+ "eval_runtime": 11.2964,
35539
+ "eval_samples_per_second": 50.901,
35540
+ "eval_steps_per_second": 12.747,
35541
  "num_input_tokens_seen": 29434784,
35542
  "step": 21600
35543
  },
 
35864
  {
35865
  "epoch": 67.28438948995363,
35866
  "eval_loss": 1.6700409650802612,
35867
+ "eval_runtime": 11.3083,
35868
+ "eval_samples_per_second": 50.847,
35869
+ "eval_steps_per_second": 12.734,
35870
  "num_input_tokens_seen": 29706800,
35871
  "step": 21800
35872
  },
 
36193
  {
36194
  "epoch": 67.90262751159196,
36195
  "eval_loss": 1.6916232109069824,
36196
+ "eval_runtime": 11.2968,
36197
+ "eval_samples_per_second": 50.899,
36198
+ "eval_steps_per_second": 12.747,
36199
  "num_input_tokens_seen": 29980240,
36200
  "step": 22000
36201
  },
 
36522
  {
36523
  "epoch": 68.5193199381762,
36524
  "eval_loss": 1.7332632541656494,
36525
+ "eval_runtime": 11.2865,
36526
+ "eval_samples_per_second": 50.946,
36527
+ "eval_steps_per_second": 12.759,
36528
  "num_input_tokens_seen": 30250192,
36529
  "step": 22200
36530
  },
 
36851
  {
36852
  "epoch": 69.13601236476043,
36853
  "eval_loss": 1.7388529777526855,
36854
+ "eval_runtime": 11.3123,
36855
+ "eval_samples_per_second": 50.83,
36856
+ "eval_steps_per_second": 12.73,
36857
  "num_input_tokens_seen": 30522672,
36858
  "step": 22400
36859
  },
 
37180
  {
37181
  "epoch": 69.75425038639877,
37182
  "eval_loss": 1.7202584743499756,
37183
+ "eval_runtime": 11.2903,
37184
+ "eval_samples_per_second": 50.928,
37185
+ "eval_steps_per_second": 12.754,
37186
  "num_input_tokens_seen": 30795024,
37187
  "step": 22600
37188
  },
 
37509
  {
37510
  "epoch": 70.370942812983,
37511
  "eval_loss": 1.7700324058532715,
37512
+ "eval_runtime": 11.3014,
37513
+ "eval_samples_per_second": 50.878,
37514
+ "eval_steps_per_second": 12.742,
37515
  "num_input_tokens_seen": 31066544,
37516
  "step": 22800
37517
  },
 
37838
  {
37839
  "epoch": 70.98918083462132,
37840
  "eval_loss": 1.769662857055664,
37841
+ "eval_runtime": 11.3053,
37842
+ "eval_samples_per_second": 50.861,
37843
+ "eval_steps_per_second": 12.737,
37844
  "num_input_tokens_seen": 31338128,
37845
  "step": 23000
37846
  },
 
38167
  {
38168
  "epoch": 71.60587326120556,
38169
  "eval_loss": 1.8099125623703003,
38170
+ "eval_runtime": 11.297,
38171
+ "eval_samples_per_second": 50.898,
38172
+ "eval_steps_per_second": 12.747,
38173
  "num_input_tokens_seen": 31609104,
38174
  "step": 23200
38175
  },
 
38496
  {
38497
  "epoch": 72.2225656877898,
38498
  "eval_loss": 1.856191873550415,
38499
+ "eval_runtime": 11.2897,
38500
+ "eval_samples_per_second": 50.931,
38501
+ "eval_steps_per_second": 12.755,
38502
  "num_input_tokens_seen": 31881424,
38503
  "step": 23400
38504
  },
 
38825
  {
38826
  "epoch": 72.84080370942813,
38827
  "eval_loss": 1.7837176322937012,
38828
+ "eval_runtime": 11.3065,
38829
+ "eval_samples_per_second": 50.856,
38830
+ "eval_steps_per_second": 12.736,
38831
  "num_input_tokens_seen": 32155024,
38832
  "step": 23600
38833
  },
 
39154
  {
39155
  "epoch": 73.45749613601237,
39156
  "eval_loss": 1.8125648498535156,
39157
+ "eval_runtime": 11.2948,
39158
+ "eval_samples_per_second": 50.909,
39159
+ "eval_steps_per_second": 12.749,
39160
  "num_input_tokens_seen": 32425312,
39161
  "step": 23800
39162
  },
 
39483
  {
39484
  "epoch": 74.0741885625966,
39485
  "eval_loss": 1.8575142621994019,
39486
+ "eval_runtime": 11.2926,
39487
+ "eval_samples_per_second": 50.918,
39488
+ "eval_steps_per_second": 12.752,
39489
  "num_input_tokens_seen": 32698784,
39490
  "step": 24000
39491
  },
 
39812
  {
39813
  "epoch": 74.69242658423494,
39814
  "eval_loss": 1.8753187656402588,
39815
+ "eval_runtime": 11.2917,
39816
+ "eval_samples_per_second": 50.922,
39817
+ "eval_steps_per_second": 12.753,
39818
  "num_input_tokens_seen": 32974144,
39819
  "step": 24200
39820
  },
 
40141
  {
40142
  "epoch": 75.30911901081916,
40143
  "eval_loss": 1.9167370796203613,
40144
+ "eval_runtime": 11.3077,
40145
+ "eval_samples_per_second": 50.85,
40146
+ "eval_steps_per_second": 12.735,
40147
  "num_input_tokens_seen": 33245216,
40148
  "step": 24400
40149
  },
 
40470
  {
40471
  "epoch": 75.9273570324575,
40472
  "eval_loss": 1.1968048810958862,
40473
+ "eval_runtime": 11.3303,
40474
+ "eval_samples_per_second": 50.749,
40475
+ "eval_steps_per_second": 12.709,
40476
  "num_input_tokens_seen": 33517088,
40477
  "step": 24600
40478
  },
 
40799
  {
40800
  "epoch": 76.54404945904173,
40801
  "eval_loss": 1.3781951665878296,
40802
+ "eval_runtime": 11.2957,
40803
+ "eval_samples_per_second": 50.904,
40804
+ "eval_steps_per_second": 12.748,
40805
  "num_input_tokens_seen": 33788432,
40806
  "step": 24800
40807
  },
 
41128
  {
41129
  "epoch": 77.16074188562597,
41130
  "eval_loss": 1.5010449886322021,
41131
+ "eval_runtime": 11.327,
41132
+ "eval_samples_per_second": 50.763,
41133
+ "eval_steps_per_second": 12.713,
41134
  "num_input_tokens_seen": 34060416,
41135
  "step": 25000
41136
  },
 
41457
  {
41458
  "epoch": 77.7789799072643,
41459
  "eval_loss": 1.5149627923965454,
41460
+ "eval_runtime": 11.2979,
41461
+ "eval_samples_per_second": 50.894,
41462
+ "eval_steps_per_second": 12.746,
41463
  "num_input_tokens_seen": 34333408,
41464
  "step": 25200
41465
  },
 
41786
  {
41787
  "epoch": 78.39567233384854,
41788
  "eval_loss": 1.6160272359848022,
41789
+ "eval_runtime": 11.2891,
41790
+ "eval_samples_per_second": 50.934,
41791
+ "eval_steps_per_second": 12.756,
41792
  "num_input_tokens_seen": 34605392,
41793
  "step": 25400
41794
  },
 
42115
  {
42116
  "epoch": 79.01236476043276,
42117
  "eval_loss": 1.5819573402404785,
42118
+ "eval_runtime": 11.3204,
42119
+ "eval_samples_per_second": 50.793,
42120
+ "eval_steps_per_second": 12.72,
42121
  "num_input_tokens_seen": 34879536,
42122
  "step": 25600
42123
  },
 
42444
  {
42445
  "epoch": 79.6306027820711,
42446
  "eval_loss": 1.651304841041565,
42447
+ "eval_runtime": 11.2935,
42448
+ "eval_samples_per_second": 50.914,
42449
+ "eval_steps_per_second": 12.751,
42450
  "num_input_tokens_seen": 35153488,
42451
  "step": 25800
42452
  },
 
42773
  {
42774
  "epoch": 80.24729520865533,
42775
  "eval_loss": 1.6964157819747925,
42776
+ "eval_runtime": 11.2883,
42777
+ "eval_samples_per_second": 50.937,
42778
+ "eval_steps_per_second": 12.757,
42779
  "num_input_tokens_seen": 35424912,
42780
  "step": 26000
42781
  },
 
43102
  {
43103
  "epoch": 80.86553323029366,
43104
  "eval_loss": 1.7483088970184326,
43105
+ "eval_runtime": 11.3043,
43106
+ "eval_samples_per_second": 50.865,
43107
+ "eval_steps_per_second": 12.738,
43108
  "num_input_tokens_seen": 35698064,
43109
  "step": 26200
43110
  },
 
43431
  {
43432
  "epoch": 81.4822256568779,
43433
  "eval_loss": 1.7370902299880981,
43434
+ "eval_runtime": 11.2996,
43435
+ "eval_samples_per_second": 50.887,
43436
+ "eval_steps_per_second": 12.744,
43437
  "num_input_tokens_seen": 35968160,
43438
  "step": 26400
43439
  },
 
43760
  {
43761
  "epoch": 82.09891808346214,
43762
  "eval_loss": 1.7790963649749756,
43763
+ "eval_runtime": 11.2991,
43764
+ "eval_samples_per_second": 50.889,
43765
+ "eval_steps_per_second": 12.744,
43766
  "num_input_tokens_seen": 36240928,
43767
  "step": 26600
43768
  },
 
44089
  {
44090
  "epoch": 82.71715610510046,
44091
  "eval_loss": 1.772797703742981,
44092
+ "eval_runtime": 11.2933,
44093
+ "eval_samples_per_second": 50.915,
44094
+ "eval_steps_per_second": 12.751,
44095
  "num_input_tokens_seen": 36514208,
44096
  "step": 26800
44097
  },
 
44418
  {
44419
  "epoch": 83.3338485316847,
44420
  "eval_loss": 1.7722996473312378,
44421
+ "eval_runtime": 11.3164,
44422
+ "eval_samples_per_second": 50.811,
44423
+ "eval_steps_per_second": 12.725,
44424
  "num_input_tokens_seen": 36785136,
44425
  "step": 27000
44426
  },
 
44747
  {
44748
  "epoch": 83.95208655332303,
44749
  "eval_loss": 1.8002300262451172,
44750
+ "eval_runtime": 11.2936,
44751
+ "eval_samples_per_second": 50.914,
44752
+ "eval_steps_per_second": 12.751,
44753
  "num_input_tokens_seen": 37061648,
44754
  "step": 27200
44755
  },
 
45076
  {
45077
  "epoch": 84.56877897990726,
45078
  "eval_loss": 1.804320216178894,
45079
+ "eval_runtime": 11.2862,
45080
+ "eval_samples_per_second": 50.947,
45081
+ "eval_steps_per_second": 12.759,
45082
  "num_input_tokens_seen": 37333648,
45083
  "step": 27400
45084
  },
 
45405
  {
45406
  "epoch": 85.1854714064915,
45407
  "eval_loss": 1.8354555368423462,
45408
+ "eval_runtime": 11.3126,
45409
+ "eval_samples_per_second": 50.828,
45410
+ "eval_steps_per_second": 12.729,
45411
  "num_input_tokens_seen": 37605184,
45412
  "step": 27600
45413
  },
 
45734
  {
45735
  "epoch": 85.80370942812984,
45736
  "eval_loss": 1.8400607109069824,
45737
+ "eval_runtime": 11.2893,
45738
+ "eval_samples_per_second": 50.933,
45739
+ "eval_steps_per_second": 12.755,
45740
  "num_input_tokens_seen": 37875360,
45741
  "step": 27800
45742
  },
 
46063
  {
46064
  "epoch": 86.42040185471407,
46065
  "eval_loss": 1.8688201904296875,
46066
+ "eval_runtime": 11.2959,
46067
+ "eval_samples_per_second": 50.904,
46068
+ "eval_steps_per_second": 12.748,
46069
  "num_input_tokens_seen": 38150208,
46070
  "step": 28000
46071
  },
 
46392
  {
46393
  "epoch": 87.0370942812983,
46394
  "eval_loss": 1.810387134552002,
46395
+ "eval_runtime": 11.3075,
46396
+ "eval_samples_per_second": 50.851,
46397
+ "eval_steps_per_second": 12.735,
46398
  "num_input_tokens_seen": 38422048,
46399
  "step": 28200
46400
  },
 
46721
  {
46722
  "epoch": 87.65533230293663,
46723
  "eval_loss": 1.8730015754699707,
46724
+ "eval_runtime": 11.2946,
46725
+ "eval_samples_per_second": 50.909,
46726
+ "eval_steps_per_second": 12.749,
46727
  "num_input_tokens_seen": 38692224,
46728
  "step": 28400
46729
  },
 
47050
  {
47051
  "epoch": 88.27202472952087,
47052
  "eval_loss": 1.8786824941635132,
47053
+ "eval_runtime": 11.2989,
47054
+ "eval_samples_per_second": 50.89,
47055
+ "eval_steps_per_second": 12.745,
47056
  "num_input_tokens_seen": 38964176,
47057
  "step": 28600
47058
  },
 
47379
  {
47380
  "epoch": 88.8902627511592,
47381
  "eval_loss": 1.8849008083343506,
47382
+ "eval_runtime": 11.2971,
47383
+ "eval_samples_per_second": 50.898,
47384
+ "eval_steps_per_second": 12.747,
47385
  "num_input_tokens_seen": 39235184,
47386
  "step": 28800
47387
  },
 
47708
  {
47709
  "epoch": 89.50695517774344,
47710
  "eval_loss": 1.9232840538024902,
47711
+ "eval_runtime": 11.3151,
47712
+ "eval_samples_per_second": 50.817,
47713
+ "eval_steps_per_second": 12.726,
47714
  "num_input_tokens_seen": 39507520,
47715
  "step": 29000
47716
  },
 
48037
  {
48038
  "epoch": 90.12364760432767,
48039
  "eval_loss": 1.9127227067947388,
48040
+ "eval_runtime": 11.3026,
48041
+ "eval_samples_per_second": 50.873,
48042
+ "eval_steps_per_second": 12.74,
48043
  "num_input_tokens_seen": 39779328,
48044
  "step": 29200
48045
  },
 
48366
  {
48367
  "epoch": 90.74188562596599,
48368
  "eval_loss": 1.8981382846832275,
48369
+ "eval_runtime": 11.2948,
48370
+ "eval_samples_per_second": 50.908,
48371
+ "eval_steps_per_second": 12.749,
48372
  "num_input_tokens_seen": 40051520,
48373
  "step": 29400
48374
  },
 
48695
  {
48696
  "epoch": 91.35857805255023,
48697
  "eval_loss": 1.9302953481674194,
48698
+ "eval_runtime": 11.3008,
48699
+ "eval_samples_per_second": 50.881,
48700
+ "eval_steps_per_second": 12.742,
48701
  "num_input_tokens_seen": 40322576,
48702
  "step": 29600
48703
  },
 
49024
  {
49025
  "epoch": 91.97681607418856,
49026
  "eval_loss": 1.9179975986480713,
49027
+ "eval_runtime": 11.2952,
49028
+ "eval_samples_per_second": 50.907,
49029
+ "eval_steps_per_second": 12.749,
49030
  "num_input_tokens_seen": 40596016,
49031
  "step": 29800
49032
  },
 
49353
  {
49354
  "epoch": 92.5935085007728,
49355
  "eval_loss": 1.92044997215271,
49356
+ "eval_runtime": 11.297,
49357
+ "eval_samples_per_second": 50.898,
49358
+ "eval_steps_per_second": 12.747,
49359
  "num_input_tokens_seen": 40867568,
49360
  "step": 30000
49361
  },
 
49682
  {
49683
  "epoch": 93.21020092735704,
49684
  "eval_loss": 1.9711647033691406,
49685
+ "eval_runtime": 11.3063,
49686
+ "eval_samples_per_second": 50.857,
49687
+ "eval_steps_per_second": 12.736,
49688
  "num_input_tokens_seen": 41140848,
49689
  "step": 30200
49690
  },
 
50011
  {
50012
  "epoch": 93.82843894899537,
50013
  "eval_loss": 1.976061463356018,
50014
+ "eval_runtime": 11.2906,
50015
+ "eval_samples_per_second": 50.927,
50016
+ "eval_steps_per_second": 12.754,
50017
  "num_input_tokens_seen": 41412848,
50018
  "step": 30400
50019
  },
 
50340
  {
50341
  "epoch": 94.44513137557959,
50342
  "eval_loss": 1.958508849143982,
50343
+ "eval_runtime": 11.2928,
50344
+ "eval_samples_per_second": 50.917,
50345
+ "eval_steps_per_second": 12.751,
50346
  "num_input_tokens_seen": 41683920,
50347
  "step": 30600
50348
  },
 
50669
  {
50670
  "epoch": 95.06182380216383,
50671
  "eval_loss": 1.9966574907302856,
50672
+ "eval_runtime": 11.302,
50673
+ "eval_samples_per_second": 50.876,
50674
+ "eval_steps_per_second": 12.741,
50675
  "num_input_tokens_seen": 41959008,
50676
  "step": 30800
50677
  },
 
50998
  {
50999
  "epoch": 95.68006182380216,
51000
  "eval_loss": 1.9950237274169922,
51001
+ "eval_runtime": 11.3056,
51002
+ "eval_samples_per_second": 50.86,
51003
+ "eval_steps_per_second": 12.737,
51004
  "num_input_tokens_seen": 42231520,
51005
  "step": 31000
51006
  },
 
51327
  {
51328
  "epoch": 96.2967542503864,
51329
  "eval_loss": 1.9839365482330322,
51330
+ "eval_runtime": 11.293,
51331
+ "eval_samples_per_second": 50.916,
51332
+ "eval_steps_per_second": 12.751,
51333
  "num_input_tokens_seen": 42502416,
51334
  "step": 31200
51335
  },
 
51656
  {
51657
  "epoch": 96.91499227202473,
51658
  "eval_loss": 2.004136323928833,
51659
+ "eval_runtime": 11.2961,
51660
+ "eval_samples_per_second": 50.902,
51661
+ "eval_steps_per_second": 12.748,
51662
  "num_input_tokens_seen": 42776304,
51663
  "step": 31400
51664
  },
 
51985
  {
51986
  "epoch": 97.53168469860897,
51987
  "eval_loss": 2.016206979751587,
51988
+ "eval_runtime": 11.3124,
51989
+ "eval_samples_per_second": 50.829,
51990
+ "eval_steps_per_second": 12.729,
51991
  "num_input_tokens_seen": 43048176,
51992
  "step": 31600
51993
  },
 
52314
  {
52315
  "epoch": 98.14837712519319,
52316
  "eval_loss": 2.010310173034668,
52317
+ "eval_runtime": 11.3207,
52318
+ "eval_samples_per_second": 50.792,
52319
+ "eval_steps_per_second": 12.72,
52320
  "num_input_tokens_seen": 43320144,
52321
  "step": 31800
52322
  },
 
52643
  {
52644
  "epoch": 98.76661514683153,
52645
  "eval_loss": 2.008124589920044,
52646
+ "eval_runtime": 11.2928,
52647
+ "eval_samples_per_second": 50.917,
52648
+ "eval_steps_per_second": 12.751,
52649
  "num_input_tokens_seen": 43591728,
52650
  "step": 32000
52651
  },
 
52972
  {
52973
  "epoch": 99.38330757341576,
52974
  "eval_loss": 2.027338981628418,
52975
+ "eval_runtime": 11.3086,
52976
+ "eval_samples_per_second": 50.846,
52977
+ "eval_steps_per_second": 12.734,
52978
  "num_input_tokens_seen": 43866048,
52979
  "step": 32200
52980
  },
 
53301
  {
53302
  "epoch": 100.0,
53303
  "eval_loss": 2.034688711166382,
53304
+ "eval_runtime": 11.2867,
53305
+ "eval_samples_per_second": 50.945,
53306
+ "eval_steps_per_second": 12.758,
53307
  "num_input_tokens_seen": 44137040,
53308
  "step": 32400
53309
  },
 
53630
  {
53631
  "epoch": 100.61823802163833,
53632
  "eval_loss": 2.052443265914917,
53633
+ "eval_runtime": 11.2954,
53634
+ "eval_samples_per_second": 50.905,
53635
+ "eval_steps_per_second": 12.749,
53636
  "num_input_tokens_seen": 44408848,
53637
  "step": 32600
53638
  },
 
53959
  {
53960
  "epoch": 101.23493044822257,
53961
  "eval_loss": 2.067155599594116,
53962
+ "eval_runtime": 11.3248,
53963
+ "eval_samples_per_second": 50.774,
53964
+ "eval_steps_per_second": 12.715,
53965
  "num_input_tokens_seen": 44682912,
53966
  "step": 32800
53967
  },
 
54288
  {
54289
  "epoch": 101.85316846986089,
54290
  "eval_loss": 2.042910575866699,
54291
+ "eval_runtime": 11.2924,
54292
+ "eval_samples_per_second": 50.919,
54293
+ "eval_steps_per_second": 12.752,
54294
  "num_input_tokens_seen": 44956000,
54295
  "step": 33000
54296
  },
 
54617
  {
54618
  "epoch": 102.46986089644513,
54619
  "eval_loss": 2.049968719482422,
54620
+ "eval_runtime": 11.2991,
54621
+ "eval_samples_per_second": 50.889,
54622
+ "eval_steps_per_second": 12.744,
54623
  "num_input_tokens_seen": 45227824,
54624
  "step": 33200
54625
  },
 
54946
  {
54947
  "epoch": 103.08655332302936,
54948
  "eval_loss": 2.047577142715454,
54949
+ "eval_runtime": 11.2983,
54950
+ "eval_samples_per_second": 50.893,
54951
+ "eval_steps_per_second": 12.745,
54952
  "num_input_tokens_seen": 45498320,
54953
  "step": 33400
54954
  },
 
55275
  {
55276
  "epoch": 103.7047913446677,
55277
  "eval_loss": 2.0636227130889893,
55278
+ "eval_runtime": 11.3222,
55279
+ "eval_samples_per_second": 50.785,
55280
+ "eval_steps_per_second": 12.718,
55281
  "num_input_tokens_seen": 45773648,
55282
  "step": 33600
55283
  },
 
55604
  {
55605
  "epoch": 104.32148377125193,
55606
  "eval_loss": 2.080799102783203,
55607
+ "eval_runtime": 11.3037,
55608
+ "eval_samples_per_second": 50.868,
55609
+ "eval_steps_per_second": 12.739,
55610
  "num_input_tokens_seen": 46044128,
55611
  "step": 33800
55612
  },
 
55933
  {
55934
  "epoch": 104.93972179289027,
55935
  "eval_loss": 2.0720911026000977,
55936
+ "eval_runtime": 11.3001,
55937
+ "eval_samples_per_second": 50.885,
55938
+ "eval_steps_per_second": 12.743,
55939
  "num_input_tokens_seen": 46317504,
55940
  "step": 34000
55941
  },
 
56262
  {
56263
  "epoch": 105.5564142194745,
56264
  "eval_loss": 2.082965612411499,
56265
+ "eval_runtime": 11.312,
56266
+ "eval_samples_per_second": 50.831,
56267
+ "eval_steps_per_second": 12.73,
56268
  "num_input_tokens_seen": 46589024,
56269
  "step": 34200
56270
  },
 
56591
  {
56592
  "epoch": 106.17310664605873,
56593
  "eval_loss": 2.094463348388672,
56594
+ "eval_runtime": 11.2952,
56595
+ "eval_samples_per_second": 50.906,
56596
+ "eval_steps_per_second": 12.749,
56597
  "num_input_tokens_seen": 46863680,
56598
  "step": 34400
56599
  },
 
56920
  {
56921
  "epoch": 106.79134466769706,
56922
  "eval_loss": 2.0966665744781494,
56923
+ "eval_runtime": 11.2915,
56924
+ "eval_samples_per_second": 50.923,
56925
+ "eval_steps_per_second": 12.753,
56926
  "num_input_tokens_seen": 47135520,
56927
  "step": 34600
56928
  },
 
57249
  {
57250
  "epoch": 107.4080370942813,
57251
  "eval_loss": 2.1042280197143555,
57252
+ "eval_runtime": 11.3301,
57253
+ "eval_samples_per_second": 50.75,
57254
+ "eval_steps_per_second": 12.709,
57255
  "num_input_tokens_seen": 47407056,
57256
  "step": 34800
57257
  },
 
57578
  {
57579
  "epoch": 108.02472952086553,
57580
  "eval_loss": 2.096859931945801,
57581
+ "eval_runtime": 11.2877,
57582
+ "eval_samples_per_second": 50.94,
57583
+ "eval_steps_per_second": 12.757,
57584
  "num_input_tokens_seen": 47680112,
57585
  "step": 35000
57586
  },
 
57907
  {
57908
  "epoch": 108.64296754250387,
57909
  "eval_loss": 2.1074352264404297,
57910
+ "eval_runtime": 11.2944,
57911
+ "eval_samples_per_second": 50.91,
57912
+ "eval_steps_per_second": 12.75,
57913
  "num_input_tokens_seen": 47951632,
57914
  "step": 35200
57915
  },
 
58236
  {
58237
  "epoch": 109.2596599690881,
58238
  "eval_loss": 2.110261917114258,
58239
+ "eval_runtime": 11.3012,
58240
+ "eval_samples_per_second": 50.879,
58241
+ "eval_steps_per_second": 12.742,
58242
  "num_input_tokens_seen": 48224016,
58243
  "step": 35400
58244
  },
 
58565
  {
58566
  "epoch": 109.87789799072642,
58567
  "eval_loss": 2.1071767807006836,
58568
+ "eval_runtime": 11.3117,
58569
+ "eval_samples_per_second": 50.832,
58570
+ "eval_steps_per_second": 12.73,
58571
  "num_input_tokens_seen": 48497072,
58572
  "step": 35600
58573
  },
 
58894
  {
58895
  "epoch": 110.49459041731066,
58896
  "eval_loss": 2.1081290245056152,
58897
+ "eval_runtime": 11.2959,
58898
+ "eval_samples_per_second": 50.904,
58899
+ "eval_steps_per_second": 12.748,
58900
  "num_input_tokens_seen": 48768624,
58901
  "step": 35800
58902
  },
 
59223
  {
59224
  "epoch": 111.1112828438949,
59225
  "eval_loss": 2.1115777492523193,
59226
+ "eval_runtime": 11.2933,
59227
+ "eval_samples_per_second": 50.915,
59228
+ "eval_steps_per_second": 12.751,
59229
  "num_input_tokens_seen": 49041488,
59230
  "step": 36000
59231
  },
 
59552
  {
59553
  "epoch": 111.72952086553323,
59554
  "eval_loss": 2.1243085861206055,
59555
+ "eval_runtime": 11.3106,
59556
+ "eval_samples_per_second": 50.837,
59557
+ "eval_steps_per_second": 12.731,
59558
  "num_input_tokens_seen": 49314352,
59559
  "step": 36200
59560
  },
 
59881
  {
59882
  "epoch": 112.34621329211747,
59883
  "eval_loss": 2.1214993000030518,
59884
+ "eval_runtime": 11.297,
59885
+ "eval_samples_per_second": 50.898,
59886
+ "eval_steps_per_second": 12.747,
59887
  "num_input_tokens_seen": 49584848,
59888
  "step": 36400
59889
  },
 
60210
  {
60211
  "epoch": 112.9644513137558,
60212
  "eval_loss": 2.1198999881744385,
60213
+ "eval_runtime": 11.3061,
60214
+ "eval_samples_per_second": 50.857,
60215
+ "eval_steps_per_second": 12.736,
60216
  "num_input_tokens_seen": 49858864,
60217
  "step": 36600
60218
  },
 
60539
  {
60540
  "epoch": 113.58114374034002,
60541
  "eval_loss": 2.129167079925537,
60542
+ "eval_runtime": 11.3074,
60543
+ "eval_samples_per_second": 50.852,
60544
+ "eval_steps_per_second": 12.735,
60545
  "num_input_tokens_seen": 50130000,
60546
  "step": 36800
60547
  },
 
60868
  {
60869
  "epoch": 114.19783616692426,
60870
  "eval_loss": 2.127554178237915,
60871
+ "eval_runtime": 11.3023,
60872
+ "eval_samples_per_second": 50.875,
60873
+ "eval_steps_per_second": 12.741,
60874
  "num_input_tokens_seen": 50404128,
60875
  "step": 37000
60876
  },
 
61197
  {
61198
  "epoch": 114.8160741885626,
61199
  "eval_loss": 2.1345906257629395,
61200
+ "eval_runtime": 11.2978,
61201
+ "eval_samples_per_second": 50.895,
61202
+ "eval_steps_per_second": 12.746,
61203
  "num_input_tokens_seen": 50678112,
61204
  "step": 37200
61205
  },
 
61526
  {
61527
  "epoch": 115.43276661514683,
61528
  "eval_loss": 2.132272481918335,
61529
+ "eval_runtime": 11.3176,
61530
+ "eval_samples_per_second": 50.806,
61531
+ "eval_steps_per_second": 12.724,
61532
  "num_input_tokens_seen": 50946800,
61533
  "step": 37400
61534
  },
 
61855
  {
61856
  "epoch": 116.04945904173107,
61857
  "eval_loss": 2.131873369216919,
61858
+ "eval_runtime": 11.3073,
61859
+ "eval_samples_per_second": 50.852,
61860
+ "eval_steps_per_second": 12.735,
61861
  "num_input_tokens_seen": 51219680,
61862
  "step": 37600
61863
  },
 
62184
  {
62185
  "epoch": 116.6676970633694,
62186
  "eval_loss": 2.1323955059051514,
62187
+ "eval_runtime": 11.2948,
62188
+ "eval_samples_per_second": 50.908,
62189
+ "eval_steps_per_second": 12.749,
62190
  "num_input_tokens_seen": 51492544,
62191
  "step": 37800
62192
  },
 
62513
  {
62514
  "epoch": 117.28438948995363,
62515
  "eval_loss": 2.135064125061035,
62516
+ "eval_runtime": 11.3036,
62517
+ "eval_samples_per_second": 50.869,
62518
+ "eval_steps_per_second": 12.739,
62519
  "num_input_tokens_seen": 51764160,
62520
  "step": 38000
62521
  },
 
62842
  {
62843
  "epoch": 117.90262751159196,
62844
  "eval_loss": 2.134946584701538,
62845
+ "eval_runtime": 11.3035,
62846
+ "eval_samples_per_second": 50.869,
62847
+ "eval_steps_per_second": 12.739,
62848
  "num_input_tokens_seen": 52039488,
62849
  "step": 38200
62850
  },
 
63171
  {
63172
  "epoch": 118.5193199381762,
63173
  "eval_loss": 2.1382298469543457,
63174
+ "eval_runtime": 11.2966,
63175
+ "eval_samples_per_second": 50.9,
63176
+ "eval_steps_per_second": 12.747,
63177
  "num_input_tokens_seen": 52311648,
63178
  "step": 38400
63179
  },
 
63500
  {
63501
  "epoch": 119.13601236476043,
63502
  "eval_loss": 2.1389503479003906,
63503
+ "eval_runtime": 11.299,
63504
+ "eval_samples_per_second": 50.89,
63505
+ "eval_steps_per_second": 12.745,
63506
  "num_input_tokens_seen": 52584960,
63507
  "step": 38600
63508
  },
 
63829
  {
63830
  "epoch": 119.75425038639877,
63831
  "eval_loss": 2.141028642654419,
63832
+ "eval_runtime": 11.2899,
63833
+ "eval_samples_per_second": 50.93,
63834
+ "eval_steps_per_second": 12.755,
63835
  "num_input_tokens_seen": 52855712,
63836
  "step": 38800
63837
  },
 
64158
  {
64159
  "epoch": 120.370942812983,
64160
  "eval_loss": 2.142845630645752,
64161
+ "eval_runtime": 12.65,
64162
+ "eval_samples_per_second": 45.454,
64163
+ "eval_steps_per_second": 11.383,
64164
  "num_input_tokens_seen": 53128480,
64165
  "step": 39000
64166
  },
 
64487
  {
64488
  "epoch": 120.98918083462132,
64489
  "eval_loss": 2.142850637435913,
64490
+ "eval_runtime": 11.2942,
64491
+ "eval_samples_per_second": 50.911,
64492
+ "eval_steps_per_second": 12.75,
64493
  "num_input_tokens_seen": 53401056,
64494
  "step": 39200
64495
  },
 
64816
  {
64817
  "epoch": 121.60587326120556,
64818
  "eval_loss": 2.141234874725342,
64819
+ "eval_runtime": 11.3357,
64820
+ "eval_samples_per_second": 50.725,
64821
+ "eval_steps_per_second": 12.703,
64822
  "num_input_tokens_seen": 53673600,
64823
  "step": 39400
64824
  },
 
65145
  {
65146
  "epoch": 122.2225656877898,
65147
  "eval_loss": 2.1376492977142334,
65148
+ "eval_runtime": 11.2938,
65149
+ "eval_samples_per_second": 50.913,
65150
+ "eval_steps_per_second": 12.75,
65151
  "num_input_tokens_seen": 53943712,
65152
  "step": 39600
65153
  },
 
65474
  {
65475
  "epoch": 122.84080370942813,
65476
  "eval_loss": 2.1381330490112305,
65477
+ "eval_runtime": 11.2911,
65478
+ "eval_samples_per_second": 50.925,
65479
+ "eval_steps_per_second": 12.753,
65480
  "num_input_tokens_seen": 54217344,
65481
  "step": 39800
65482
  },
 
65803
  {
65804
  "epoch": 123.45749613601237,
65805
  "eval_loss": 2.137033700942993,
65806
+ "eval_runtime": 11.2958,
65807
+ "eval_samples_per_second": 50.904,
65808
+ "eval_steps_per_second": 12.748,
65809
  "num_input_tokens_seen": 54490336,
65810
  "step": 40000
65811
  },
 
65815
  "step": 40000,
65816
  "total_flos": 2.453675202191819e+18,
65817
  "train_loss": 0.10362623064493919,
65818
+ "train_runtime": 29204.064,
65819
+ "train_samples_per_second": 21.915,
65820
+ "train_steps_per_second": 1.37
65821
  }
65822
  ],
65823
  "logging_steps": 5,