CocoRoF commited on
Commit
3902263
·
verified ·
1 Parent(s): 1e1dcd6

Training in progress, step 22500, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0decb17e1576a2e87ecfcfd97d8e2ab8486eb9a2ec6ff00fa3b7efa6f74327ba
3
  size 737632172
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c052d2a67b46c3cbd43e1aaef6787d4e22f25e4730e41749c440d5f7ef1edfa2
3
  size 737632172
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:018d14bd69f4e34f78162e646a75e937b89f1d651e49bb2da5fd566a3dc03363
3
  size 1475354682
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:08cf78d547b8f41ee223ea3ee959627730e71a470e0b2fc768b9df22602cd24c
3
  size 1475354682
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7a711ae47907423581a85380ad2222bf6eaf1af9c9ec45797d4f1a9fb127db2c
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:71f7b2a24ea005bf8d4cda6609b3f02fff97ffd137300264d740b525a5d16d52
3
  size 15984
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1e8c873ca3f378713a8a07acffb82e5be966b4efb3815b7ddf04ac4a39c37a73
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2be8bf00d7769668daa21530103090701683c486d14d68b216dc7599084911c5
3
  size 15984
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b0fcb54b765d5b0c806961a1b8bdc3214f4fc0489fbe2c720c7312b23d2db5cf
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d6ef13f5707990bc90ed5888b6375995ece6a20da4e110402f911e1ccc4380cc
3
  size 15984
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b2a30b2ad9b3632b41b5d2a70ad5aabce34a6f7a76a9e1e270a22f600a05ec22
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6712ce65d3859451db93ee2b906b7b2aadd22b863c4396671311580298e33eef
3
  size 15984
last-checkpoint/rng_state_4.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4ee9cd8fd6ff53fdc84fbb7925a1d22d7707021b0e4b45ae16328680d2405512
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5c23b4a4f7a5f1ca851bf1110551e94214f4e9a551744c59b43604b27936d6b6
3
  size 15984
last-checkpoint/rng_state_5.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2b60c5d4b71ffd198beb51d796fd8e27c367782bb1efc7c5f1065d3ed20df402
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6f319a4bebd2c89fbe15733682cc2571137e2e88a7af83c46922d3bce0e3020b
3
  size 15984
last-checkpoint/rng_state_6.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:87c6f1afcb23fc820bb3d68d94d047f124b182adf1d874dcd0fa3a260a51bb2b
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:29f83a80e96cc577cd7fc37cafb41eb24ffb7cdd78cafd492938746bbf31281a
3
  size 15984
last-checkpoint/rng_state_7.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7ebfc4481eb53675078ccf162293df1d6b7500f8ba0b2d00cad430e67f4a70a3
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:06f91ff7510df73136b368e7b7956418d774a7089cc90de91ae93a237ac8dcaf
3
  size 15984
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:97ffafe779a971f149a59a73318cc7969252e85b03c3f756e6cdd7e796033658
3
  size 1000
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3fed0d2c4bde11cab9fcbb818c02c913c6ca7dd78332351188e7f279c6394b16
3
  size 1000
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.8876018661829237,
5
  "eval_steps": 1000,
6
- "global_step": 20000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -14167,6 +14167,1772 @@
14167
  "eval_samples_per_second": 1803.669,
14168
  "eval_steps_per_second": 56.365,
14169
  "step": 20000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14170
  }
14171
  ],
14172
  "logging_steps": 10,
@@ -14186,7 +15952,7 @@
14186
  "attributes": {}
14187
  }
14188
  },
14189
- "total_flos": 6.979446410051584e+18,
14190
  "train_batch_size": 4,
14191
  "trial_name": null,
14192
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.9985520994557892,
5
  "eval_steps": 1000,
6
+ "global_step": 22500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
14167
  "eval_samples_per_second": 1803.669,
14168
  "eval_steps_per_second": 56.365,
14169
  "step": 20000
14170
+ },
14171
+ {
14172
+ "epoch": 0.8880456671160151,
14173
+ "grad_norm": 65.7813491821289,
14174
+ "learning_rate": 9.965310740183166e-06,
14175
+ "loss": 10.058,
14176
+ "step": 20010
14177
+ },
14178
+ {
14179
+ "epoch": 0.8884894680491066,
14180
+ "grad_norm": 56.84501647949219,
14181
+ "learning_rate": 9.965293404221238e-06,
14182
+ "loss": 10.5991,
14183
+ "step": 20020
14184
+ },
14185
+ {
14186
+ "epoch": 0.888933268982198,
14187
+ "grad_norm": 55.88136672973633,
14188
+ "learning_rate": 9.96527606825931e-06,
14189
+ "loss": 10.2387,
14190
+ "step": 20030
14191
+ },
14192
+ {
14193
+ "epoch": 0.8893770699152895,
14194
+ "grad_norm": 65.21720886230469,
14195
+ "learning_rate": 9.965258732297384e-06,
14196
+ "loss": 10.6988,
14197
+ "step": 20040
14198
+ },
14199
+ {
14200
+ "epoch": 0.8898208708483809,
14201
+ "grad_norm": 57.60173416137695,
14202
+ "learning_rate": 9.965241396335455e-06,
14203
+ "loss": 10.0901,
14204
+ "step": 20050
14205
+ },
14206
+ {
14207
+ "epoch": 0.8902646717814724,
14208
+ "grad_norm": 66.22525787353516,
14209
+ "learning_rate": 9.965224060373528e-06,
14210
+ "loss": 9.9539,
14211
+ "step": 20060
14212
+ },
14213
+ {
14214
+ "epoch": 0.8907084727145639,
14215
+ "grad_norm": 59.16026306152344,
14216
+ "learning_rate": 9.965206724411601e-06,
14217
+ "loss": 10.2055,
14218
+ "step": 20070
14219
+ },
14220
+ {
14221
+ "epoch": 0.8911522736476554,
14222
+ "grad_norm": 63.042850494384766,
14223
+ "learning_rate": 9.965189388449673e-06,
14224
+ "loss": 10.608,
14225
+ "step": 20080
14226
+ },
14227
+ {
14228
+ "epoch": 0.8915960745807469,
14229
+ "grad_norm": 70.37860870361328,
14230
+ "learning_rate": 9.965172052487746e-06,
14231
+ "loss": 10.2199,
14232
+ "step": 20090
14233
+ },
14234
+ {
14235
+ "epoch": 0.8920398755138382,
14236
+ "grad_norm": 59.544456481933594,
14237
+ "learning_rate": 9.965154716525819e-06,
14238
+ "loss": 10.625,
14239
+ "step": 20100
14240
+ },
14241
+ {
14242
+ "epoch": 0.8924836764469297,
14243
+ "grad_norm": 59.02389907836914,
14244
+ "learning_rate": 9.96513738056389e-06,
14245
+ "loss": 10.6333,
14246
+ "step": 20110
14247
+ },
14248
+ {
14249
+ "epoch": 0.8929274773800212,
14250
+ "grad_norm": 55.048667907714844,
14251
+ "learning_rate": 9.965120044601963e-06,
14252
+ "loss": 9.668,
14253
+ "step": 20120
14254
+ },
14255
+ {
14256
+ "epoch": 0.8933712783131127,
14257
+ "grad_norm": 70.93208312988281,
14258
+ "learning_rate": 9.965102708640036e-06,
14259
+ "loss": 10.0719,
14260
+ "step": 20130
14261
+ },
14262
+ {
14263
+ "epoch": 0.8938150792462041,
14264
+ "grad_norm": 60.164588928222656,
14265
+ "learning_rate": 9.965085372678108e-06,
14266
+ "loss": 10.5491,
14267
+ "step": 20140
14268
+ },
14269
+ {
14270
+ "epoch": 0.8942588801792956,
14271
+ "grad_norm": 63.05065155029297,
14272
+ "learning_rate": 9.965068036716181e-06,
14273
+ "loss": 10.5735,
14274
+ "step": 20150
14275
+ },
14276
+ {
14277
+ "epoch": 0.894702681112387,
14278
+ "grad_norm": 66.36126708984375,
14279
+ "learning_rate": 9.965050700754254e-06,
14280
+ "loss": 10.0222,
14281
+ "step": 20160
14282
+ },
14283
+ {
14284
+ "epoch": 0.8951464820454785,
14285
+ "grad_norm": 65.72769165039062,
14286
+ "learning_rate": 9.965033364792325e-06,
14287
+ "loss": 9.764,
14288
+ "step": 20170
14289
+ },
14290
+ {
14291
+ "epoch": 0.8955902829785699,
14292
+ "grad_norm": 65.65251922607422,
14293
+ "learning_rate": 9.965016028830398e-06,
14294
+ "loss": 10.2024,
14295
+ "step": 20180
14296
+ },
14297
+ {
14298
+ "epoch": 0.8960340839116614,
14299
+ "grad_norm": 75.98188018798828,
14300
+ "learning_rate": 9.964998692868472e-06,
14301
+ "loss": 10.6188,
14302
+ "step": 20190
14303
+ },
14304
+ {
14305
+ "epoch": 0.8964778848447529,
14306
+ "grad_norm": 53.20927429199219,
14307
+ "learning_rate": 9.964981356906545e-06,
14308
+ "loss": 10.6707,
14309
+ "step": 20200
14310
+ },
14311
+ {
14312
+ "epoch": 0.8969216857778444,
14313
+ "grad_norm": 68.85663604736328,
14314
+ "learning_rate": 9.964964020944616e-06,
14315
+ "loss": 10.2216,
14316
+ "step": 20210
14317
+ },
14318
+ {
14319
+ "epoch": 0.8973654867109359,
14320
+ "grad_norm": 60.16876983642578,
14321
+ "learning_rate": 9.964946684982689e-06,
14322
+ "loss": 9.6589,
14323
+ "step": 20220
14324
+ },
14325
+ {
14326
+ "epoch": 0.8978092876440272,
14327
+ "grad_norm": 63.93645477294922,
14328
+ "learning_rate": 9.964929349020762e-06,
14329
+ "loss": 10.0363,
14330
+ "step": 20230
14331
+ },
14332
+ {
14333
+ "epoch": 0.8982530885771187,
14334
+ "grad_norm": 60.31320571899414,
14335
+ "learning_rate": 9.964912013058834e-06,
14336
+ "loss": 10.2793,
14337
+ "step": 20240
14338
+ },
14339
+ {
14340
+ "epoch": 0.8986968895102102,
14341
+ "grad_norm": 57.123626708984375,
14342
+ "learning_rate": 9.964894677096907e-06,
14343
+ "loss": 10.699,
14344
+ "step": 20250
14345
+ },
14346
+ {
14347
+ "epoch": 0.8991406904433017,
14348
+ "grad_norm": 67.34060668945312,
14349
+ "learning_rate": 9.96487734113498e-06,
14350
+ "loss": 10.715,
14351
+ "step": 20260
14352
+ },
14353
+ {
14354
+ "epoch": 0.8995844913763931,
14355
+ "grad_norm": 60.88409423828125,
14356
+ "learning_rate": 9.964860005173051e-06,
14357
+ "loss": 10.485,
14358
+ "step": 20270
14359
+ },
14360
+ {
14361
+ "epoch": 0.9000282923094846,
14362
+ "grad_norm": 59.35341262817383,
14363
+ "learning_rate": 9.964842669211124e-06,
14364
+ "loss": 9.8806,
14365
+ "step": 20280
14366
+ },
14367
+ {
14368
+ "epoch": 0.900472093242576,
14369
+ "grad_norm": 70.66928100585938,
14370
+ "learning_rate": 9.964825333249197e-06,
14371
+ "loss": 10.1476,
14372
+ "step": 20290
14373
+ },
14374
+ {
14375
+ "epoch": 0.9009158941756675,
14376
+ "grad_norm": 57.342830657958984,
14377
+ "learning_rate": 9.964807997287269e-06,
14378
+ "loss": 10.0399,
14379
+ "step": 20300
14380
+ },
14381
+ {
14382
+ "epoch": 0.901359695108759,
14383
+ "grad_norm": 72.41960906982422,
14384
+ "learning_rate": 9.964790661325342e-06,
14385
+ "loss": 10.3914,
14386
+ "step": 20310
14387
+ },
14388
+ {
14389
+ "epoch": 0.9018034960418504,
14390
+ "grad_norm": 66.77056121826172,
14391
+ "learning_rate": 9.964773325363415e-06,
14392
+ "loss": 10.1905,
14393
+ "step": 20320
14394
+ },
14395
+ {
14396
+ "epoch": 0.9022472969749419,
14397
+ "grad_norm": 72.86076354980469,
14398
+ "learning_rate": 9.964755989401486e-06,
14399
+ "loss": 10.1389,
14400
+ "step": 20330
14401
+ },
14402
+ {
14403
+ "epoch": 0.9026910979080334,
14404
+ "grad_norm": 64.16802215576172,
14405
+ "learning_rate": 9.96473865343956e-06,
14406
+ "loss": 10.4992,
14407
+ "step": 20340
14408
+ },
14409
+ {
14410
+ "epoch": 0.9031348988411249,
14411
+ "grad_norm": 71.86752319335938,
14412
+ "learning_rate": 9.964721317477632e-06,
14413
+ "loss": 9.9231,
14414
+ "step": 20350
14415
+ },
14416
+ {
14417
+ "epoch": 0.9035786997742162,
14418
+ "grad_norm": 69.46532440185547,
14419
+ "learning_rate": 9.964703981515704e-06,
14420
+ "loss": 10.2235,
14421
+ "step": 20360
14422
+ },
14423
+ {
14424
+ "epoch": 0.9040225007073077,
14425
+ "grad_norm": 58.126220703125,
14426
+ "learning_rate": 9.964686645553777e-06,
14427
+ "loss": 9.7999,
14428
+ "step": 20370
14429
+ },
14430
+ {
14431
+ "epoch": 0.9044663016403992,
14432
+ "grad_norm": 60.532958984375,
14433
+ "learning_rate": 9.96466930959185e-06,
14434
+ "loss": 10.689,
14435
+ "step": 20380
14436
+ },
14437
+ {
14438
+ "epoch": 0.9049101025734907,
14439
+ "grad_norm": 68.76100158691406,
14440
+ "learning_rate": 9.964651973629921e-06,
14441
+ "loss": 10.3952,
14442
+ "step": 20390
14443
+ },
14444
+ {
14445
+ "epoch": 0.9053539035065821,
14446
+ "grad_norm": 68.4054183959961,
14447
+ "learning_rate": 9.964634637667994e-06,
14448
+ "loss": 10.5912,
14449
+ "step": 20400
14450
+ },
14451
+ {
14452
+ "epoch": 0.9057977044396736,
14453
+ "grad_norm": 58.026851654052734,
14454
+ "learning_rate": 9.964617301706067e-06,
14455
+ "loss": 10.5631,
14456
+ "step": 20410
14457
+ },
14458
+ {
14459
+ "epoch": 0.906241505372765,
14460
+ "grad_norm": 68.02127838134766,
14461
+ "learning_rate": 9.96459996574414e-06,
14462
+ "loss": 10.4884,
14463
+ "step": 20420
14464
+ },
14465
+ {
14466
+ "epoch": 0.9066853063058565,
14467
+ "grad_norm": 71.5272216796875,
14468
+ "learning_rate": 9.964582629782212e-06,
14469
+ "loss": 10.2185,
14470
+ "step": 20430
14471
+ },
14472
+ {
14473
+ "epoch": 0.907129107238948,
14474
+ "grad_norm": 69.71690368652344,
14475
+ "learning_rate": 9.964565293820285e-06,
14476
+ "loss": 9.6736,
14477
+ "step": 20440
14478
+ },
14479
+ {
14480
+ "epoch": 0.9075729081720394,
14481
+ "grad_norm": 71.79097747802734,
14482
+ "learning_rate": 9.964547957858358e-06,
14483
+ "loss": 10.3408,
14484
+ "step": 20450
14485
+ },
14486
+ {
14487
+ "epoch": 0.9080167091051309,
14488
+ "grad_norm": 62.07600402832031,
14489
+ "learning_rate": 9.96453062189643e-06,
14490
+ "loss": 10.1348,
14491
+ "step": 20460
14492
+ },
14493
+ {
14494
+ "epoch": 0.9084605100382224,
14495
+ "grad_norm": 70.9972152709961,
14496
+ "learning_rate": 9.964513285934503e-06,
14497
+ "loss": 10.3505,
14498
+ "step": 20470
14499
+ },
14500
+ {
14501
+ "epoch": 0.9089043109713139,
14502
+ "grad_norm": 62.994075775146484,
14503
+ "learning_rate": 9.964495949972576e-06,
14504
+ "loss": 10.8796,
14505
+ "step": 20480
14506
+ },
14507
+ {
14508
+ "epoch": 0.9093481119044052,
14509
+ "grad_norm": 57.46833801269531,
14510
+ "learning_rate": 9.964478614010647e-06,
14511
+ "loss": 10.4861,
14512
+ "step": 20490
14513
+ },
14514
+ {
14515
+ "epoch": 0.9097919128374967,
14516
+ "grad_norm": 71.40296173095703,
14517
+ "learning_rate": 9.96446127804872e-06,
14518
+ "loss": 9.9325,
14519
+ "step": 20500
14520
+ },
14521
+ {
14522
+ "epoch": 0.9102357137705882,
14523
+ "grad_norm": 71.27222442626953,
14524
+ "learning_rate": 9.964443942086793e-06,
14525
+ "loss": 10.204,
14526
+ "step": 20510
14527
+ },
14528
+ {
14529
+ "epoch": 0.9106795147036797,
14530
+ "grad_norm": 76.38119506835938,
14531
+ "learning_rate": 9.964426606124865e-06,
14532
+ "loss": 9.9359,
14533
+ "step": 20520
14534
+ },
14535
+ {
14536
+ "epoch": 0.9111233156367712,
14537
+ "grad_norm": 60.81074905395508,
14538
+ "learning_rate": 9.964409270162938e-06,
14539
+ "loss": 10.2593,
14540
+ "step": 20530
14541
+ },
14542
+ {
14543
+ "epoch": 0.9115671165698626,
14544
+ "grad_norm": 71.3153305053711,
14545
+ "learning_rate": 9.96439193420101e-06,
14546
+ "loss": 10.3253,
14547
+ "step": 20540
14548
+ },
14549
+ {
14550
+ "epoch": 0.912010917502954,
14551
+ "grad_norm": 68.32502746582031,
14552
+ "learning_rate": 9.964374598239082e-06,
14553
+ "loss": 10.073,
14554
+ "step": 20550
14555
+ },
14556
+ {
14557
+ "epoch": 0.9124547184360455,
14558
+ "grad_norm": 60.2060546875,
14559
+ "learning_rate": 9.964357262277155e-06,
14560
+ "loss": 10.3595,
14561
+ "step": 20560
14562
+ },
14563
+ {
14564
+ "epoch": 0.912898519369137,
14565
+ "grad_norm": 69.63631439208984,
14566
+ "learning_rate": 9.964339926315228e-06,
14567
+ "loss": 10.1938,
14568
+ "step": 20570
14569
+ },
14570
+ {
14571
+ "epoch": 0.9133423203022284,
14572
+ "grad_norm": 57.60273742675781,
14573
+ "learning_rate": 9.9643225903533e-06,
14574
+ "loss": 10.612,
14575
+ "step": 20580
14576
+ },
14577
+ {
14578
+ "epoch": 0.9137861212353199,
14579
+ "grad_norm": 58.673763275146484,
14580
+ "learning_rate": 9.964305254391373e-06,
14581
+ "loss": 9.8792,
14582
+ "step": 20590
14583
+ },
14584
+ {
14585
+ "epoch": 0.9142299221684114,
14586
+ "grad_norm": 65.37464141845703,
14587
+ "learning_rate": 9.964287918429446e-06,
14588
+ "loss": 10.0737,
14589
+ "step": 20600
14590
+ },
14591
+ {
14592
+ "epoch": 0.9146737231015029,
14593
+ "grad_norm": 63.91664123535156,
14594
+ "learning_rate": 9.964270582467517e-06,
14595
+ "loss": 9.939,
14596
+ "step": 20610
14597
+ },
14598
+ {
14599
+ "epoch": 0.9151175240345942,
14600
+ "grad_norm": 69.06259155273438,
14601
+ "learning_rate": 9.96425324650559e-06,
14602
+ "loss": 10.422,
14603
+ "step": 20620
14604
+ },
14605
+ {
14606
+ "epoch": 0.9155613249676857,
14607
+ "grad_norm": 68.58475494384766,
14608
+ "learning_rate": 9.964235910543663e-06,
14609
+ "loss": 9.9517,
14610
+ "step": 20630
14611
+ },
14612
+ {
14613
+ "epoch": 0.9160051259007772,
14614
+ "grad_norm": 61.14803695678711,
14615
+ "learning_rate": 9.964218574581736e-06,
14616
+ "loss": 10.2273,
14617
+ "step": 20640
14618
+ },
14619
+ {
14620
+ "epoch": 0.9164489268338687,
14621
+ "grad_norm": 61.80668258666992,
14622
+ "learning_rate": 9.964201238619808e-06,
14623
+ "loss": 10.4235,
14624
+ "step": 20650
14625
+ },
14626
+ {
14627
+ "epoch": 0.9168927277669602,
14628
+ "grad_norm": 68.1749038696289,
14629
+ "learning_rate": 9.96418390265788e-06,
14630
+ "loss": 10.5679,
14631
+ "step": 20660
14632
+ },
14633
+ {
14634
+ "epoch": 0.9173365287000516,
14635
+ "grad_norm": 58.40181350708008,
14636
+ "learning_rate": 9.964166566695954e-06,
14637
+ "loss": 10.4709,
14638
+ "step": 20670
14639
+ },
14640
+ {
14641
+ "epoch": 0.917780329633143,
14642
+ "grad_norm": 59.180511474609375,
14643
+ "learning_rate": 9.964149230734025e-06,
14644
+ "loss": 9.9807,
14645
+ "step": 20680
14646
+ },
14647
+ {
14648
+ "epoch": 0.9182241305662345,
14649
+ "grad_norm": 73.31056213378906,
14650
+ "learning_rate": 9.964131894772098e-06,
14651
+ "loss": 10.3648,
14652
+ "step": 20690
14653
+ },
14654
+ {
14655
+ "epoch": 0.918667931499326,
14656
+ "grad_norm": 69.01880645751953,
14657
+ "learning_rate": 9.964114558810171e-06,
14658
+ "loss": 9.9768,
14659
+ "step": 20700
14660
+ },
14661
+ {
14662
+ "epoch": 0.9191117324324174,
14663
+ "grad_norm": 68.61119079589844,
14664
+ "learning_rate": 9.964097222848243e-06,
14665
+ "loss": 10.1686,
14666
+ "step": 20710
14667
+ },
14668
+ {
14669
+ "epoch": 0.9195555333655089,
14670
+ "grad_norm": 62.651973724365234,
14671
+ "learning_rate": 9.964079886886316e-06,
14672
+ "loss": 10.1275,
14673
+ "step": 20720
14674
+ },
14675
+ {
14676
+ "epoch": 0.9199993342986004,
14677
+ "grad_norm": 62.72435760498047,
14678
+ "learning_rate": 9.964062550924389e-06,
14679
+ "loss": 10.2096,
14680
+ "step": 20730
14681
+ },
14682
+ {
14683
+ "epoch": 0.9204431352316919,
14684
+ "grad_norm": 57.11748504638672,
14685
+ "learning_rate": 9.96404521496246e-06,
14686
+ "loss": 10.5196,
14687
+ "step": 20740
14688
+ },
14689
+ {
14690
+ "epoch": 0.9208869361647832,
14691
+ "grad_norm": 64.23450469970703,
14692
+ "learning_rate": 9.964027879000533e-06,
14693
+ "loss": 10.4117,
14694
+ "step": 20750
14695
+ },
14696
+ {
14697
+ "epoch": 0.9213307370978747,
14698
+ "grad_norm": 69.50020599365234,
14699
+ "learning_rate": 9.964010543038607e-06,
14700
+ "loss": 10.4837,
14701
+ "step": 20760
14702
+ },
14703
+ {
14704
+ "epoch": 0.9217745380309662,
14705
+ "grad_norm": 55.89120101928711,
14706
+ "learning_rate": 9.963993207076678e-06,
14707
+ "loss": 9.9386,
14708
+ "step": 20770
14709
+ },
14710
+ {
14711
+ "epoch": 0.9222183389640577,
14712
+ "grad_norm": 59.105995178222656,
14713
+ "learning_rate": 9.963975871114751e-06,
14714
+ "loss": 10.2322,
14715
+ "step": 20780
14716
+ },
14717
+ {
14718
+ "epoch": 0.9226621398971492,
14719
+ "grad_norm": 55.461021423339844,
14720
+ "learning_rate": 9.963958535152824e-06,
14721
+ "loss": 10.1765,
14722
+ "step": 20790
14723
+ },
14724
+ {
14725
+ "epoch": 0.9231059408302406,
14726
+ "grad_norm": 76.23094940185547,
14727
+ "learning_rate": 9.963941199190897e-06,
14728
+ "loss": 10.2576,
14729
+ "step": 20800
14730
+ },
14731
+ {
14732
+ "epoch": 0.923549741763332,
14733
+ "grad_norm": 72.75599670410156,
14734
+ "learning_rate": 9.963923863228969e-06,
14735
+ "loss": 10.7945,
14736
+ "step": 20810
14737
+ },
14738
+ {
14739
+ "epoch": 0.9239935426964235,
14740
+ "grad_norm": 61.83809280395508,
14741
+ "learning_rate": 9.963906527267042e-06,
14742
+ "loss": 10.4771,
14743
+ "step": 20820
14744
+ },
14745
+ {
14746
+ "epoch": 0.924437343629515,
14747
+ "grad_norm": 58.66376876831055,
14748
+ "learning_rate": 9.963889191305115e-06,
14749
+ "loss": 10.0925,
14750
+ "step": 20830
14751
+ },
14752
+ {
14753
+ "epoch": 0.9248811445626064,
14754
+ "grad_norm": 70.21393585205078,
14755
+ "learning_rate": 9.963871855343186e-06,
14756
+ "loss": 10.2228,
14757
+ "step": 20840
14758
+ },
14759
+ {
14760
+ "epoch": 0.9253249454956979,
14761
+ "grad_norm": 55.493282318115234,
14762
+ "learning_rate": 9.96385451938126e-06,
14763
+ "loss": 10.1143,
14764
+ "step": 20850
14765
+ },
14766
+ {
14767
+ "epoch": 0.9257687464287894,
14768
+ "grad_norm": 58.75727081298828,
14769
+ "learning_rate": 9.963837183419332e-06,
14770
+ "loss": 10.2045,
14771
+ "step": 20860
14772
+ },
14773
+ {
14774
+ "epoch": 0.9262125473618809,
14775
+ "grad_norm": 60.97832107543945,
14776
+ "learning_rate": 9.963819847457404e-06,
14777
+ "loss": 10.176,
14778
+ "step": 20870
14779
+ },
14780
+ {
14781
+ "epoch": 0.9266563482949723,
14782
+ "grad_norm": 66.70062255859375,
14783
+ "learning_rate": 9.963802511495477e-06,
14784
+ "loss": 10.8102,
14785
+ "step": 20880
14786
+ },
14787
+ {
14788
+ "epoch": 0.9271001492280637,
14789
+ "grad_norm": 59.54788589477539,
14790
+ "learning_rate": 9.96378517553355e-06,
14791
+ "loss": 9.8927,
14792
+ "step": 20890
14793
+ },
14794
+ {
14795
+ "epoch": 0.9275439501611552,
14796
+ "grad_norm": 56.45012283325195,
14797
+ "learning_rate": 9.963767839571621e-06,
14798
+ "loss": 10.2811,
14799
+ "step": 20900
14800
+ },
14801
+ {
14802
+ "epoch": 0.9279877510942467,
14803
+ "grad_norm": 63.22991943359375,
14804
+ "learning_rate": 9.963750503609694e-06,
14805
+ "loss": 10.0369,
14806
+ "step": 20910
14807
+ },
14808
+ {
14809
+ "epoch": 0.9284315520273382,
14810
+ "grad_norm": 66.25374603271484,
14811
+ "learning_rate": 9.963733167647767e-06,
14812
+ "loss": 10.2809,
14813
+ "step": 20920
14814
+ },
14815
+ {
14816
+ "epoch": 0.9288753529604296,
14817
+ "grad_norm": 65.86581420898438,
14818
+ "learning_rate": 9.96371583168584e-06,
14819
+ "loss": 10.2681,
14820
+ "step": 20930
14821
+ },
14822
+ {
14823
+ "epoch": 0.929319153893521,
14824
+ "grad_norm": 68.9689712524414,
14825
+ "learning_rate": 9.963698495723912e-06,
14826
+ "loss": 10.6552,
14827
+ "step": 20940
14828
+ },
14829
+ {
14830
+ "epoch": 0.9297629548266125,
14831
+ "grad_norm": 71.05926513671875,
14832
+ "learning_rate": 9.963681159761985e-06,
14833
+ "loss": 9.7715,
14834
+ "step": 20950
14835
+ },
14836
+ {
14837
+ "epoch": 0.930206755759704,
14838
+ "grad_norm": 76.48685455322266,
14839
+ "learning_rate": 9.963663823800058e-06,
14840
+ "loss": 10.4682,
14841
+ "step": 20960
14842
+ },
14843
+ {
14844
+ "epoch": 0.9306505566927954,
14845
+ "grad_norm": 63.782257080078125,
14846
+ "learning_rate": 9.96364648783813e-06,
14847
+ "loss": 10.3633,
14848
+ "step": 20970
14849
+ },
14850
+ {
14851
+ "epoch": 0.9310943576258869,
14852
+ "grad_norm": 71.73765563964844,
14853
+ "learning_rate": 9.963629151876202e-06,
14854
+ "loss": 10.465,
14855
+ "step": 20980
14856
+ },
14857
+ {
14858
+ "epoch": 0.9315381585589784,
14859
+ "grad_norm": 60.337677001953125,
14860
+ "learning_rate": 9.963611815914275e-06,
14861
+ "loss": 9.9345,
14862
+ "step": 20990
14863
+ },
14864
+ {
14865
+ "epoch": 0.9319819594920699,
14866
+ "grad_norm": 58.7932243347168,
14867
+ "learning_rate": 9.963594479952347e-06,
14868
+ "loss": 9.7854,
14869
+ "step": 21000
14870
+ },
14871
+ {
14872
+ "epoch": 0.9319819594920699,
14873
+ "eval_loss": 0.32051748037338257,
14874
+ "eval_runtime": 673.8843,
14875
+ "eval_samples_per_second": 1802.076,
14876
+ "eval_steps_per_second": 56.315,
14877
+ "step": 21000
14878
+ },
14879
+ {
14880
+ "epoch": 0.9324257604251613,
14881
+ "grad_norm": 61.75080871582031,
14882
+ "learning_rate": 9.96357714399042e-06,
14883
+ "loss": 10.7108,
14884
+ "step": 21010
14885
+ },
14886
+ {
14887
+ "epoch": 0.9328695613582527,
14888
+ "grad_norm": 66.45942687988281,
14889
+ "learning_rate": 9.963559808028493e-06,
14890
+ "loss": 9.7115,
14891
+ "step": 21020
14892
+ },
14893
+ {
14894
+ "epoch": 0.9333133622913442,
14895
+ "grad_norm": 66.86365509033203,
14896
+ "learning_rate": 9.963542472066564e-06,
14897
+ "loss": 10.4903,
14898
+ "step": 21030
14899
+ },
14900
+ {
14901
+ "epoch": 0.9337571632244357,
14902
+ "grad_norm": 59.50422668457031,
14903
+ "learning_rate": 9.963525136104637e-06,
14904
+ "loss": 10.2654,
14905
+ "step": 21040
14906
+ },
14907
+ {
14908
+ "epoch": 0.9342009641575272,
14909
+ "grad_norm": 65.55677032470703,
14910
+ "learning_rate": 9.96350780014271e-06,
14911
+ "loss": 10.2086,
14912
+ "step": 21050
14913
+ },
14914
+ {
14915
+ "epoch": 0.9346447650906186,
14916
+ "grad_norm": 60.787967681884766,
14917
+ "learning_rate": 9.963490464180784e-06,
14918
+ "loss": 10.4747,
14919
+ "step": 21060
14920
+ },
14921
+ {
14922
+ "epoch": 0.93508856602371,
14923
+ "grad_norm": 58.43632507324219,
14924
+ "learning_rate": 9.963473128218855e-06,
14925
+ "loss": 10.1393,
14926
+ "step": 21070
14927
+ },
14928
+ {
14929
+ "epoch": 0.9355323669568015,
14930
+ "grad_norm": 80.03214263916016,
14931
+ "learning_rate": 9.963455792256928e-06,
14932
+ "loss": 10.1442,
14933
+ "step": 21080
14934
+ },
14935
+ {
14936
+ "epoch": 0.935976167889893,
14937
+ "grad_norm": 58.377357482910156,
14938
+ "learning_rate": 9.963438456295001e-06,
14939
+ "loss": 10.3524,
14940
+ "step": 21090
14941
+ },
14942
+ {
14943
+ "epoch": 0.9364199688229845,
14944
+ "grad_norm": 62.32807159423828,
14945
+ "learning_rate": 9.963421120333073e-06,
14946
+ "loss": 10.1192,
14947
+ "step": 21100
14948
+ },
14949
+ {
14950
+ "epoch": 0.9368637697560759,
14951
+ "grad_norm": 62.65857696533203,
14952
+ "learning_rate": 9.963403784371146e-06,
14953
+ "loss": 10.2389,
14954
+ "step": 21110
14955
+ },
14956
+ {
14957
+ "epoch": 0.9373075706891674,
14958
+ "grad_norm": 57.880252838134766,
14959
+ "learning_rate": 9.963386448409219e-06,
14960
+ "loss": 10.244,
14961
+ "step": 21120
14962
+ },
14963
+ {
14964
+ "epoch": 0.9377513716222589,
14965
+ "grad_norm": 65.94375610351562,
14966
+ "learning_rate": 9.96336911244729e-06,
14967
+ "loss": 10.4006,
14968
+ "step": 21130
14969
+ },
14970
+ {
14971
+ "epoch": 0.9381951725553503,
14972
+ "grad_norm": 57.031944274902344,
14973
+ "learning_rate": 9.963351776485363e-06,
14974
+ "loss": 10.1134,
14975
+ "step": 21140
14976
+ },
14977
+ {
14978
+ "epoch": 0.9386389734884417,
14979
+ "grad_norm": 54.85254669189453,
14980
+ "learning_rate": 9.963334440523436e-06,
14981
+ "loss": 10.1874,
14982
+ "step": 21150
14983
+ },
14984
+ {
14985
+ "epoch": 0.9390827744215332,
14986
+ "grad_norm": 61.38536834716797,
14987
+ "learning_rate": 9.963317104561508e-06,
14988
+ "loss": 10.2983,
14989
+ "step": 21160
14990
+ },
14991
+ {
14992
+ "epoch": 0.9395265753546247,
14993
+ "grad_norm": 63.140010833740234,
14994
+ "learning_rate": 9.96329976859958e-06,
14995
+ "loss": 10.518,
14996
+ "step": 21170
14997
+ },
14998
+ {
14999
+ "epoch": 0.9399703762877162,
15000
+ "grad_norm": 64.6523208618164,
15001
+ "learning_rate": 9.963282432637654e-06,
15002
+ "loss": 9.8054,
15003
+ "step": 21180
15004
+ },
15005
+ {
15006
+ "epoch": 0.9404141772208076,
15007
+ "grad_norm": 64.04774475097656,
15008
+ "learning_rate": 9.963265096675727e-06,
15009
+ "loss": 10.1823,
15010
+ "step": 21190
15011
+ },
15012
+ {
15013
+ "epoch": 0.940857978153899,
15014
+ "grad_norm": 61.016780853271484,
15015
+ "learning_rate": 9.963247760713798e-06,
15016
+ "loss": 10.0684,
15017
+ "step": 21200
15018
+ },
15019
+ {
15020
+ "epoch": 0.9413017790869905,
15021
+ "grad_norm": 55.790435791015625,
15022
+ "learning_rate": 9.963230424751871e-06,
15023
+ "loss": 9.9463,
15024
+ "step": 21210
15025
+ },
15026
+ {
15027
+ "epoch": 0.941745580020082,
15028
+ "grad_norm": 58.64189910888672,
15029
+ "learning_rate": 9.963213088789944e-06,
15030
+ "loss": 9.7533,
15031
+ "step": 21220
15032
+ },
15033
+ {
15034
+ "epoch": 0.9421893809531735,
15035
+ "grad_norm": 62.45125961303711,
15036
+ "learning_rate": 9.963195752828016e-06,
15037
+ "loss": 10.2871,
15038
+ "step": 21230
15039
+ },
15040
+ {
15041
+ "epoch": 0.9426331818862649,
15042
+ "grad_norm": 57.653106689453125,
15043
+ "learning_rate": 9.963178416866089e-06,
15044
+ "loss": 9.6557,
15045
+ "step": 21240
15046
+ },
15047
+ {
15048
+ "epoch": 0.9430769828193564,
15049
+ "grad_norm": 70.4138412475586,
15050
+ "learning_rate": 9.963161080904162e-06,
15051
+ "loss": 9.947,
15052
+ "step": 21250
15053
+ },
15054
+ {
15055
+ "epoch": 0.9435207837524479,
15056
+ "grad_norm": 62.09977340698242,
15057
+ "learning_rate": 9.963143744942233e-06,
15058
+ "loss": 10.3042,
15059
+ "step": 21260
15060
+ },
15061
+ {
15062
+ "epoch": 0.9439645846855393,
15063
+ "grad_norm": 57.68659973144531,
15064
+ "learning_rate": 9.963126408980306e-06,
15065
+ "loss": 10.1486,
15066
+ "step": 21270
15067
+ },
15068
+ {
15069
+ "epoch": 0.9444083856186307,
15070
+ "grad_norm": 58.4940185546875,
15071
+ "learning_rate": 9.96310907301838e-06,
15072
+ "loss": 10.6721,
15073
+ "step": 21280
15074
+ },
15075
+ {
15076
+ "epoch": 0.9448521865517222,
15077
+ "grad_norm": 62.28818893432617,
15078
+ "learning_rate": 9.963091737056451e-06,
15079
+ "loss": 10.3957,
15080
+ "step": 21290
15081
+ },
15082
+ {
15083
+ "epoch": 0.9452959874848137,
15084
+ "grad_norm": 52.62106704711914,
15085
+ "learning_rate": 9.963074401094524e-06,
15086
+ "loss": 9.9156,
15087
+ "step": 21300
15088
+ },
15089
+ {
15090
+ "epoch": 0.9457397884179052,
15091
+ "grad_norm": 55.59827423095703,
15092
+ "learning_rate": 9.963057065132597e-06,
15093
+ "loss": 10.5472,
15094
+ "step": 21310
15095
+ },
15096
+ {
15097
+ "epoch": 0.9461835893509966,
15098
+ "grad_norm": 66.30583953857422,
15099
+ "learning_rate": 9.96303972917067e-06,
15100
+ "loss": 10.2904,
15101
+ "step": 21320
15102
+ },
15103
+ {
15104
+ "epoch": 0.946627390284088,
15105
+ "grad_norm": 61.947025299072266,
15106
+ "learning_rate": 9.963022393208741e-06,
15107
+ "loss": 10.238,
15108
+ "step": 21330
15109
+ },
15110
+ {
15111
+ "epoch": 0.9470711912171795,
15112
+ "grad_norm": 64.70133209228516,
15113
+ "learning_rate": 9.963005057246815e-06,
15114
+ "loss": 10.2199,
15115
+ "step": 21340
15116
+ },
15117
+ {
15118
+ "epoch": 0.947514992150271,
15119
+ "grad_norm": 60.59946823120117,
15120
+ "learning_rate": 9.962987721284888e-06,
15121
+ "loss": 10.3811,
15122
+ "step": 21350
15123
+ },
15124
+ {
15125
+ "epoch": 0.9479587930833625,
15126
+ "grad_norm": 57.58180618286133,
15127
+ "learning_rate": 9.962970385322959e-06,
15128
+ "loss": 10.4364,
15129
+ "step": 21360
15130
+ },
15131
+ {
15132
+ "epoch": 0.9484025940164539,
15133
+ "grad_norm": 63.964500427246094,
15134
+ "learning_rate": 9.962953049361032e-06,
15135
+ "loss": 10.1727,
15136
+ "step": 21370
15137
+ },
15138
+ {
15139
+ "epoch": 0.9488463949495454,
15140
+ "grad_norm": 66.83090209960938,
15141
+ "learning_rate": 9.962935713399105e-06,
15142
+ "loss": 10.4908,
15143
+ "step": 21380
15144
+ },
15145
+ {
15146
+ "epoch": 0.9492901958826369,
15147
+ "grad_norm": 58.45000457763672,
15148
+ "learning_rate": 9.962918377437177e-06,
15149
+ "loss": 10.3677,
15150
+ "step": 21390
15151
+ },
15152
+ {
15153
+ "epoch": 0.9497339968157283,
15154
+ "grad_norm": 62.023040771484375,
15155
+ "learning_rate": 9.96290104147525e-06,
15156
+ "loss": 10.0164,
15157
+ "step": 21400
15158
+ },
15159
+ {
15160
+ "epoch": 0.9501777977488197,
15161
+ "grad_norm": 58.94234848022461,
15162
+ "learning_rate": 9.962883705513323e-06,
15163
+ "loss": 10.4101,
15164
+ "step": 21410
15165
+ },
15166
+ {
15167
+ "epoch": 0.9506215986819112,
15168
+ "grad_norm": 58.807456970214844,
15169
+ "learning_rate": 9.962866369551396e-06,
15170
+ "loss": 9.882,
15171
+ "step": 21420
15172
+ },
15173
+ {
15174
+ "epoch": 0.9510653996150027,
15175
+ "grad_norm": 54.390098571777344,
15176
+ "learning_rate": 9.962849033589467e-06,
15177
+ "loss": 10.6928,
15178
+ "step": 21430
15179
+ },
15180
+ {
15181
+ "epoch": 0.9515092005480942,
15182
+ "grad_norm": 65.57588195800781,
15183
+ "learning_rate": 9.96283169762754e-06,
15184
+ "loss": 10.8391,
15185
+ "step": 21440
15186
+ },
15187
+ {
15188
+ "epoch": 0.9519530014811857,
15189
+ "grad_norm": 59.871700286865234,
15190
+ "learning_rate": 9.962814361665613e-06,
15191
+ "loss": 10.1765,
15192
+ "step": 21450
15193
+ },
15194
+ {
15195
+ "epoch": 0.952396802414277,
15196
+ "grad_norm": 61.5579948425293,
15197
+ "learning_rate": 9.962797025703685e-06,
15198
+ "loss": 10.1676,
15199
+ "step": 21460
15200
+ },
15201
+ {
15202
+ "epoch": 0.9528406033473685,
15203
+ "grad_norm": 50.21920394897461,
15204
+ "learning_rate": 9.962779689741758e-06,
15205
+ "loss": 10.3828,
15206
+ "step": 21470
15207
+ },
15208
+ {
15209
+ "epoch": 0.95328440428046,
15210
+ "grad_norm": 59.22177505493164,
15211
+ "learning_rate": 9.962762353779831e-06,
15212
+ "loss": 10.3205,
15213
+ "step": 21480
15214
+ },
15215
+ {
15216
+ "epoch": 0.9537282052135515,
15217
+ "grad_norm": 66.43260955810547,
15218
+ "learning_rate": 9.962745017817902e-06,
15219
+ "loss": 9.9529,
15220
+ "step": 21490
15221
+ },
15222
+ {
15223
+ "epoch": 0.9541720061466429,
15224
+ "grad_norm": 52.945499420166016,
15225
+ "learning_rate": 9.962727681855975e-06,
15226
+ "loss": 10.0895,
15227
+ "step": 21500
15228
+ },
15229
+ {
15230
+ "epoch": 0.9546158070797344,
15231
+ "grad_norm": 65.87628173828125,
15232
+ "learning_rate": 9.962710345894048e-06,
15233
+ "loss": 9.9705,
15234
+ "step": 21510
15235
+ },
15236
+ {
15237
+ "epoch": 0.9550596080128259,
15238
+ "grad_norm": 60.30337142944336,
15239
+ "learning_rate": 9.96269300993212e-06,
15240
+ "loss": 10.2607,
15241
+ "step": 21520
15242
+ },
15243
+ {
15244
+ "epoch": 0.9555034089459173,
15245
+ "grad_norm": 69.21615600585938,
15246
+ "learning_rate": 9.962675673970193e-06,
15247
+ "loss": 9.8337,
15248
+ "step": 21530
15249
+ },
15250
+ {
15251
+ "epoch": 0.9559472098790087,
15252
+ "grad_norm": 60.485984802246094,
15253
+ "learning_rate": 9.962658338008266e-06,
15254
+ "loss": 10.2629,
15255
+ "step": 21540
15256
+ },
15257
+ {
15258
+ "epoch": 0.9563910108121002,
15259
+ "grad_norm": 57.54688262939453,
15260
+ "learning_rate": 9.962641002046339e-06,
15261
+ "loss": 10.5513,
15262
+ "step": 21550
15263
+ },
15264
+ {
15265
+ "epoch": 0.9568348117451917,
15266
+ "grad_norm": 56.43334197998047,
15267
+ "learning_rate": 9.96262366608441e-06,
15268
+ "loss": 10.5496,
15269
+ "step": 21560
15270
+ },
15271
+ {
15272
+ "epoch": 0.9572786126782832,
15273
+ "grad_norm": 69.3460464477539,
15274
+ "learning_rate": 9.962606330122484e-06,
15275
+ "loss": 10.0796,
15276
+ "step": 21570
15277
+ },
15278
+ {
15279
+ "epoch": 0.9577224136113747,
15280
+ "grad_norm": 59.36885070800781,
15281
+ "learning_rate": 9.962588994160557e-06,
15282
+ "loss": 9.7917,
15283
+ "step": 21580
15284
+ },
15285
+ {
15286
+ "epoch": 0.958166214544466,
15287
+ "grad_norm": 50.145694732666016,
15288
+ "learning_rate": 9.962571658198628e-06,
15289
+ "loss": 10.1281,
15290
+ "step": 21590
15291
+ },
15292
+ {
15293
+ "epoch": 0.9586100154775575,
15294
+ "grad_norm": 63.263710021972656,
15295
+ "learning_rate": 9.962554322236701e-06,
15296
+ "loss": 10.6324,
15297
+ "step": 21600
15298
+ },
15299
+ {
15300
+ "epoch": 0.959053816410649,
15301
+ "grad_norm": 64.55142211914062,
15302
+ "learning_rate": 9.962536986274774e-06,
15303
+ "loss": 10.3349,
15304
+ "step": 21610
15305
+ },
15306
+ {
15307
+ "epoch": 0.9594976173437405,
15308
+ "grad_norm": 69.35453796386719,
15309
+ "learning_rate": 9.962519650312846e-06,
15310
+ "loss": 10.1361,
15311
+ "step": 21620
15312
+ },
15313
+ {
15314
+ "epoch": 0.9599414182768319,
15315
+ "grad_norm": 54.69525909423828,
15316
+ "learning_rate": 9.962502314350919e-06,
15317
+ "loss": 10.561,
15318
+ "step": 21630
15319
+ },
15320
+ {
15321
+ "epoch": 0.9603852192099234,
15322
+ "grad_norm": 58.769649505615234,
15323
+ "learning_rate": 9.962484978388992e-06,
15324
+ "loss": 9.6632,
15325
+ "step": 21640
15326
+ },
15327
+ {
15328
+ "epoch": 0.9608290201430149,
15329
+ "grad_norm": 62.73846435546875,
15330
+ "learning_rate": 9.962467642427063e-06,
15331
+ "loss": 10.144,
15332
+ "step": 21650
15333
+ },
15334
+ {
15335
+ "epoch": 0.9612728210761063,
15336
+ "grad_norm": 56.944557189941406,
15337
+ "learning_rate": 9.962450306465136e-06,
15338
+ "loss": 10.048,
15339
+ "step": 21660
15340
+ },
15341
+ {
15342
+ "epoch": 0.9617166220091977,
15343
+ "grad_norm": 51.97823715209961,
15344
+ "learning_rate": 9.96243297050321e-06,
15345
+ "loss": 9.9427,
15346
+ "step": 21670
15347
+ },
15348
+ {
15349
+ "epoch": 0.9621604229422892,
15350
+ "grad_norm": 60.300987243652344,
15351
+ "learning_rate": 9.96241563454128e-06,
15352
+ "loss": 10.6487,
15353
+ "step": 21680
15354
+ },
15355
+ {
15356
+ "epoch": 0.9626042238753807,
15357
+ "grad_norm": 61.2890739440918,
15358
+ "learning_rate": 9.962398298579354e-06,
15359
+ "loss": 10.0074,
15360
+ "step": 21690
15361
+ },
15362
+ {
15363
+ "epoch": 0.9630480248084722,
15364
+ "grad_norm": 53.57798767089844,
15365
+ "learning_rate": 9.962380962617427e-06,
15366
+ "loss": 10.3964,
15367
+ "step": 21700
15368
+ },
15369
+ {
15370
+ "epoch": 0.9634918257415637,
15371
+ "grad_norm": 63.061988830566406,
15372
+ "learning_rate": 9.9623636266555e-06,
15373
+ "loss": 10.4485,
15374
+ "step": 21710
15375
+ },
15376
+ {
15377
+ "epoch": 0.963935626674655,
15378
+ "grad_norm": 60.63272476196289,
15379
+ "learning_rate": 9.962346290693571e-06,
15380
+ "loss": 10.3628,
15381
+ "step": 21720
15382
+ },
15383
+ {
15384
+ "epoch": 0.9643794276077465,
15385
+ "grad_norm": 69.05794525146484,
15386
+ "learning_rate": 9.962328954731644e-06,
15387
+ "loss": 10.0831,
15388
+ "step": 21730
15389
+ },
15390
+ {
15391
+ "epoch": 0.964823228540838,
15392
+ "grad_norm": 63.956844329833984,
15393
+ "learning_rate": 9.962311618769717e-06,
15394
+ "loss": 10.7723,
15395
+ "step": 21740
15396
+ },
15397
+ {
15398
+ "epoch": 0.9652670294739295,
15399
+ "grad_norm": 58.707271575927734,
15400
+ "learning_rate": 9.962294282807789e-06,
15401
+ "loss": 9.9793,
15402
+ "step": 21750
15403
+ },
15404
+ {
15405
+ "epoch": 0.9657108304070209,
15406
+ "grad_norm": 60.982521057128906,
15407
+ "learning_rate": 9.962276946845862e-06,
15408
+ "loss": 10.037,
15409
+ "step": 21760
15410
+ },
15411
+ {
15412
+ "epoch": 0.9661546313401124,
15413
+ "grad_norm": 58.97859191894531,
15414
+ "learning_rate": 9.962259610883935e-06,
15415
+ "loss": 10.2802,
15416
+ "step": 21770
15417
+ },
15418
+ {
15419
+ "epoch": 0.9665984322732039,
15420
+ "grad_norm": 59.34490966796875,
15421
+ "learning_rate": 9.962242274922006e-06,
15422
+ "loss": 10.1092,
15423
+ "step": 21780
15424
+ },
15425
+ {
15426
+ "epoch": 0.9670422332062953,
15427
+ "grad_norm": 72.03850555419922,
15428
+ "learning_rate": 9.96222493896008e-06,
15429
+ "loss": 10.4551,
15430
+ "step": 21790
15431
+ },
15432
+ {
15433
+ "epoch": 0.9674860341393868,
15434
+ "grad_norm": 64.43966674804688,
15435
+ "learning_rate": 9.962207602998152e-06,
15436
+ "loss": 10.2547,
15437
+ "step": 21800
15438
+ },
15439
+ {
15440
+ "epoch": 0.9679298350724782,
15441
+ "grad_norm": 58.295806884765625,
15442
+ "learning_rate": 9.962190267036224e-06,
15443
+ "loss": 10.3897,
15444
+ "step": 21810
15445
+ },
15446
+ {
15447
+ "epoch": 0.9683736360055697,
15448
+ "grad_norm": 64.32368469238281,
15449
+ "learning_rate": 9.962172931074297e-06,
15450
+ "loss": 10.4014,
15451
+ "step": 21820
15452
+ },
15453
+ {
15454
+ "epoch": 0.9688174369386612,
15455
+ "grad_norm": 61.49608612060547,
15456
+ "learning_rate": 9.96215559511237e-06,
15457
+ "loss": 10.3173,
15458
+ "step": 21830
15459
+ },
15460
+ {
15461
+ "epoch": 0.9692612378717527,
15462
+ "grad_norm": 66.80955505371094,
15463
+ "learning_rate": 9.962138259150441e-06,
15464
+ "loss": 10.3526,
15465
+ "step": 21840
15466
+ },
15467
+ {
15468
+ "epoch": 0.969705038804844,
15469
+ "grad_norm": 60.56246566772461,
15470
+ "learning_rate": 9.962120923188514e-06,
15471
+ "loss": 9.8569,
15472
+ "step": 21850
15473
+ },
15474
+ {
15475
+ "epoch": 0.9701488397379355,
15476
+ "grad_norm": 57.73928451538086,
15477
+ "learning_rate": 9.962103587226588e-06,
15478
+ "loss": 10.3068,
15479
+ "step": 21860
15480
+ },
15481
+ {
15482
+ "epoch": 0.970592640671027,
15483
+ "grad_norm": 54.70594024658203,
15484
+ "learning_rate": 9.962086251264659e-06,
15485
+ "loss": 10.054,
15486
+ "step": 21870
15487
+ },
15488
+ {
15489
+ "epoch": 0.9710364416041185,
15490
+ "grad_norm": 54.87747573852539,
15491
+ "learning_rate": 9.962068915302732e-06,
15492
+ "loss": 9.9986,
15493
+ "step": 21880
15494
+ },
15495
+ {
15496
+ "epoch": 0.9714802425372099,
15497
+ "grad_norm": 60.417457580566406,
15498
+ "learning_rate": 9.962051579340805e-06,
15499
+ "loss": 10.3474,
15500
+ "step": 21890
15501
+ },
15502
+ {
15503
+ "epoch": 0.9719240434703014,
15504
+ "grad_norm": 68.63028717041016,
15505
+ "learning_rate": 9.962034243378876e-06,
15506
+ "loss": 10.0733,
15507
+ "step": 21900
15508
+ },
15509
+ {
15510
+ "epoch": 0.9723678444033929,
15511
+ "grad_norm": 63.004581451416016,
15512
+ "learning_rate": 9.96201690741695e-06,
15513
+ "loss": 10.58,
15514
+ "step": 21910
15515
+ },
15516
+ {
15517
+ "epoch": 0.9728116453364843,
15518
+ "grad_norm": 72.34359741210938,
15519
+ "learning_rate": 9.961999571455023e-06,
15520
+ "loss": 10.2606,
15521
+ "step": 21920
15522
+ },
15523
+ {
15524
+ "epoch": 0.9732554462695758,
15525
+ "grad_norm": 66.6717758178711,
15526
+ "learning_rate": 9.961982235493096e-06,
15527
+ "loss": 10.5124,
15528
+ "step": 21930
15529
+ },
15530
+ {
15531
+ "epoch": 0.9736992472026672,
15532
+ "grad_norm": 68.4973373413086,
15533
+ "learning_rate": 9.961964899531167e-06,
15534
+ "loss": 10.3562,
15535
+ "step": 21940
15536
+ },
15537
+ {
15538
+ "epoch": 0.9741430481357587,
15539
+ "grad_norm": 67.8966064453125,
15540
+ "learning_rate": 9.96194756356924e-06,
15541
+ "loss": 9.8984,
15542
+ "step": 21950
15543
+ },
15544
+ {
15545
+ "epoch": 0.9745868490688502,
15546
+ "grad_norm": 64.77039337158203,
15547
+ "learning_rate": 9.961930227607313e-06,
15548
+ "loss": 10.4107,
15549
+ "step": 21960
15550
+ },
15551
+ {
15552
+ "epoch": 0.9750306500019417,
15553
+ "grad_norm": 56.87838363647461,
15554
+ "learning_rate": 9.961912891645385e-06,
15555
+ "loss": 10.3442,
15556
+ "step": 21970
15557
+ },
15558
+ {
15559
+ "epoch": 0.975474450935033,
15560
+ "grad_norm": 63.49540710449219,
15561
+ "learning_rate": 9.961895555683458e-06,
15562
+ "loss": 9.7954,
15563
+ "step": 21980
15564
+ },
15565
+ {
15566
+ "epoch": 0.9759182518681245,
15567
+ "grad_norm": 66.3160171508789,
15568
+ "learning_rate": 9.96187821972153e-06,
15569
+ "loss": 9.961,
15570
+ "step": 21990
15571
+ },
15572
+ {
15573
+ "epoch": 0.976362052801216,
15574
+ "grad_norm": 59.505393981933594,
15575
+ "learning_rate": 9.961860883759602e-06,
15576
+ "loss": 10.3546,
15577
+ "step": 22000
15578
+ },
15579
+ {
15580
+ "epoch": 0.976362052801216,
15581
+ "eval_loss": 0.3203989863395691,
15582
+ "eval_runtime": 674.395,
15583
+ "eval_samples_per_second": 1800.712,
15584
+ "eval_steps_per_second": 56.273,
15585
+ "step": 22000
15586
+ },
15587
+ {
15588
+ "epoch": 0.9768058537343075,
15589
+ "grad_norm": 65.98320770263672,
15590
+ "learning_rate": 9.961843547797675e-06,
15591
+ "loss": 10.2495,
15592
+ "step": 22010
15593
+ },
15594
+ {
15595
+ "epoch": 0.977249654667399,
15596
+ "grad_norm": 60.26272964477539,
15597
+ "learning_rate": 9.961826211835748e-06,
15598
+ "loss": 10.185,
15599
+ "step": 22020
15600
+ },
15601
+ {
15602
+ "epoch": 0.9776934556004904,
15603
+ "grad_norm": 77.41650390625,
15604
+ "learning_rate": 9.96180887587382e-06,
15605
+ "loss": 10.2332,
15606
+ "step": 22030
15607
+ },
15608
+ {
15609
+ "epoch": 0.9781372565335819,
15610
+ "grad_norm": 67.0610580444336,
15611
+ "learning_rate": 9.961791539911893e-06,
15612
+ "loss": 9.9651,
15613
+ "step": 22040
15614
+ },
15615
+ {
15616
+ "epoch": 0.9785810574666733,
15617
+ "grad_norm": 62.729793548583984,
15618
+ "learning_rate": 9.961774203949966e-06,
15619
+ "loss": 10.2186,
15620
+ "step": 22050
15621
+ },
15622
+ {
15623
+ "epoch": 0.9790248583997648,
15624
+ "grad_norm": 59.35409927368164,
15625
+ "learning_rate": 9.961756867988037e-06,
15626
+ "loss": 9.6738,
15627
+ "step": 22060
15628
+ },
15629
+ {
15630
+ "epoch": 0.9794686593328562,
15631
+ "grad_norm": 63.04311752319336,
15632
+ "learning_rate": 9.96173953202611e-06,
15633
+ "loss": 10.0731,
15634
+ "step": 22070
15635
+ },
15636
+ {
15637
+ "epoch": 0.9799124602659477,
15638
+ "grad_norm": 69.83229064941406,
15639
+ "learning_rate": 9.961722196064183e-06,
15640
+ "loss": 10.3592,
15641
+ "step": 22080
15642
+ },
15643
+ {
15644
+ "epoch": 0.9803562611990392,
15645
+ "grad_norm": 71.35539245605469,
15646
+ "learning_rate": 9.961704860102255e-06,
15647
+ "loss": 10.2029,
15648
+ "step": 22090
15649
+ },
15650
+ {
15651
+ "epoch": 0.9808000621321307,
15652
+ "grad_norm": 57.54240798950195,
15653
+ "learning_rate": 9.961687524140328e-06,
15654
+ "loss": 10.6619,
15655
+ "step": 22100
15656
+ },
15657
+ {
15658
+ "epoch": 0.981243863065222,
15659
+ "grad_norm": 62.44277572631836,
15660
+ "learning_rate": 9.961670188178401e-06,
15661
+ "loss": 10.35,
15662
+ "step": 22110
15663
+ },
15664
+ {
15665
+ "epoch": 0.9816876639983135,
15666
+ "grad_norm": 61.99805450439453,
15667
+ "learning_rate": 9.961652852216472e-06,
15668
+ "loss": 10.4932,
15669
+ "step": 22120
15670
+ },
15671
+ {
15672
+ "epoch": 0.982131464931405,
15673
+ "grad_norm": 63.21669387817383,
15674
+ "learning_rate": 9.961635516254545e-06,
15675
+ "loss": 10.4017,
15676
+ "step": 22130
15677
+ },
15678
+ {
15679
+ "epoch": 0.9825752658644965,
15680
+ "grad_norm": 59.47304916381836,
15681
+ "learning_rate": 9.961618180292618e-06,
15682
+ "loss": 10.439,
15683
+ "step": 22140
15684
+ },
15685
+ {
15686
+ "epoch": 0.983019066797588,
15687
+ "grad_norm": 63.6852912902832,
15688
+ "learning_rate": 9.961600844330692e-06,
15689
+ "loss": 10.2629,
15690
+ "step": 22150
15691
+ },
15692
+ {
15693
+ "epoch": 0.9834628677306794,
15694
+ "grad_norm": 58.97916793823242,
15695
+ "learning_rate": 9.961583508368763e-06,
15696
+ "loss": 10.1938,
15697
+ "step": 22160
15698
+ },
15699
+ {
15700
+ "epoch": 0.9839066686637709,
15701
+ "grad_norm": 64.60242462158203,
15702
+ "learning_rate": 9.961566172406836e-06,
15703
+ "loss": 10.5744,
15704
+ "step": 22170
15705
+ },
15706
+ {
15707
+ "epoch": 0.9843504695968623,
15708
+ "grad_norm": 61.11840057373047,
15709
+ "learning_rate": 9.961548836444909e-06,
15710
+ "loss": 9.7976,
15711
+ "step": 22180
15712
+ },
15713
+ {
15714
+ "epoch": 0.9847942705299538,
15715
+ "grad_norm": 64.354248046875,
15716
+ "learning_rate": 9.96153150048298e-06,
15717
+ "loss": 10.3659,
15718
+ "step": 22190
15719
+ },
15720
+ {
15721
+ "epoch": 0.9852380714630452,
15722
+ "grad_norm": 64.9032974243164,
15723
+ "learning_rate": 9.961514164521054e-06,
15724
+ "loss": 10.3499,
15725
+ "step": 22200
15726
+ },
15727
+ {
15728
+ "epoch": 0.9856818723961367,
15729
+ "grad_norm": 58.52092742919922,
15730
+ "learning_rate": 9.961496828559127e-06,
15731
+ "loss": 10.0338,
15732
+ "step": 22210
15733
+ },
15734
+ {
15735
+ "epoch": 0.9861256733292282,
15736
+ "grad_norm": 63.69118881225586,
15737
+ "learning_rate": 9.961479492597198e-06,
15738
+ "loss": 10.3962,
15739
+ "step": 22220
15740
+ },
15741
+ {
15742
+ "epoch": 0.9865694742623197,
15743
+ "grad_norm": 69.64185333251953,
15744
+ "learning_rate": 9.961462156635271e-06,
15745
+ "loss": 9.8446,
15746
+ "step": 22230
15747
+ },
15748
+ {
15749
+ "epoch": 0.987013275195411,
15750
+ "grad_norm": 64.73435974121094,
15751
+ "learning_rate": 9.961444820673344e-06,
15752
+ "loss": 9.8991,
15753
+ "step": 22240
15754
+ },
15755
+ {
15756
+ "epoch": 0.9874570761285025,
15757
+ "grad_norm": 55.16053771972656,
15758
+ "learning_rate": 9.961427484711416e-06,
15759
+ "loss": 10.1616,
15760
+ "step": 22250
15761
+ },
15762
+ {
15763
+ "epoch": 0.987900877061594,
15764
+ "grad_norm": 63.042667388916016,
15765
+ "learning_rate": 9.961410148749489e-06,
15766
+ "loss": 10.7821,
15767
+ "step": 22260
15768
+ },
15769
+ {
15770
+ "epoch": 0.9883446779946855,
15771
+ "grad_norm": 67.79585266113281,
15772
+ "learning_rate": 9.961392812787562e-06,
15773
+ "loss": 10.337,
15774
+ "step": 22270
15775
+ },
15776
+ {
15777
+ "epoch": 0.988788478927777,
15778
+ "grad_norm": 56.07440185546875,
15779
+ "learning_rate": 9.961375476825633e-06,
15780
+ "loss": 9.5085,
15781
+ "step": 22280
15782
+ },
15783
+ {
15784
+ "epoch": 0.9892322798608684,
15785
+ "grad_norm": 70.412841796875,
15786
+ "learning_rate": 9.961358140863706e-06,
15787
+ "loss": 10.1263,
15788
+ "step": 22290
15789
+ },
15790
+ {
15791
+ "epoch": 0.9896760807939599,
15792
+ "grad_norm": 52.69704818725586,
15793
+ "learning_rate": 9.96134080490178e-06,
15794
+ "loss": 10.1837,
15795
+ "step": 22300
15796
+ },
15797
+ {
15798
+ "epoch": 0.9901198817270513,
15799
+ "grad_norm": 61.2188606262207,
15800
+ "learning_rate": 9.96132346893985e-06,
15801
+ "loss": 10.7708,
15802
+ "step": 22310
15803
+ },
15804
+ {
15805
+ "epoch": 0.9905636826601428,
15806
+ "grad_norm": 54.49897766113281,
15807
+ "learning_rate": 9.961306132977924e-06,
15808
+ "loss": 10.7213,
15809
+ "step": 22320
15810
+ },
15811
+ {
15812
+ "epoch": 0.9910074835932342,
15813
+ "grad_norm": 62.42861557006836,
15814
+ "learning_rate": 9.961288797015997e-06,
15815
+ "loss": 10.1813,
15816
+ "step": 22330
15817
+ },
15818
+ {
15819
+ "epoch": 0.9914512845263257,
15820
+ "grad_norm": 60.572418212890625,
15821
+ "learning_rate": 9.961271461054068e-06,
15822
+ "loss": 9.7815,
15823
+ "step": 22340
15824
+ },
15825
+ {
15826
+ "epoch": 0.9918950854594172,
15827
+ "grad_norm": 66.12911224365234,
15828
+ "learning_rate": 9.961254125092141e-06,
15829
+ "loss": 10.526,
15830
+ "step": 22350
15831
+ },
15832
+ {
15833
+ "epoch": 0.9923388863925087,
15834
+ "grad_norm": 68.29117584228516,
15835
+ "learning_rate": 9.961236789130214e-06,
15836
+ "loss": 10.4441,
15837
+ "step": 22360
15838
+ },
15839
+ {
15840
+ "epoch": 0.9927826873256002,
15841
+ "grad_norm": 62.79308319091797,
15842
+ "learning_rate": 9.961219453168287e-06,
15843
+ "loss": 9.7355,
15844
+ "step": 22370
15845
+ },
15846
+ {
15847
+ "epoch": 0.9932264882586915,
15848
+ "grad_norm": 69.58606719970703,
15849
+ "learning_rate": 9.961202117206359e-06,
15850
+ "loss": 10.3635,
15851
+ "step": 22380
15852
+ },
15853
+ {
15854
+ "epoch": 0.993670289191783,
15855
+ "grad_norm": 56.06214904785156,
15856
+ "learning_rate": 9.961184781244432e-06,
15857
+ "loss": 10.2798,
15858
+ "step": 22390
15859
+ },
15860
+ {
15861
+ "epoch": 0.9941140901248745,
15862
+ "grad_norm": 68.97488403320312,
15863
+ "learning_rate": 9.961167445282505e-06,
15864
+ "loss": 10.7732,
15865
+ "step": 22400
15866
+ },
15867
+ {
15868
+ "epoch": 0.994557891057966,
15869
+ "grad_norm": 56.97425079345703,
15870
+ "learning_rate": 9.961150109320576e-06,
15871
+ "loss": 10.1022,
15872
+ "step": 22410
15873
+ },
15874
+ {
15875
+ "epoch": 0.9950016919910574,
15876
+ "grad_norm": 56.741455078125,
15877
+ "learning_rate": 9.96113277335865e-06,
15878
+ "loss": 10.2925,
15879
+ "step": 22420
15880
+ },
15881
+ {
15882
+ "epoch": 0.9954454929241489,
15883
+ "grad_norm": 69.6789321899414,
15884
+ "learning_rate": 9.961115437396722e-06,
15885
+ "loss": 9.8957,
15886
+ "step": 22430
15887
+ },
15888
+ {
15889
+ "epoch": 0.9958892938572403,
15890
+ "grad_norm": 58.19102478027344,
15891
+ "learning_rate": 9.961098101434794e-06,
15892
+ "loss": 9.5713,
15893
+ "step": 22440
15894
+ },
15895
+ {
15896
+ "epoch": 0.9963330947903318,
15897
+ "grad_norm": 56.53800964355469,
15898
+ "learning_rate": 9.961080765472867e-06,
15899
+ "loss": 9.8579,
15900
+ "step": 22450
15901
+ },
15902
+ {
15903
+ "epoch": 0.9967768957234232,
15904
+ "grad_norm": 58.388755798339844,
15905
+ "learning_rate": 9.96106342951094e-06,
15906
+ "loss": 10.0617,
15907
+ "step": 22460
15908
+ },
15909
+ {
15910
+ "epoch": 0.9972206966565147,
15911
+ "grad_norm": 57.43764877319336,
15912
+ "learning_rate": 9.961046093549011e-06,
15913
+ "loss": 10.0687,
15914
+ "step": 22470
15915
+ },
15916
+ {
15917
+ "epoch": 0.9976644975896062,
15918
+ "grad_norm": 64.95555877685547,
15919
+ "learning_rate": 9.961028757587084e-06,
15920
+ "loss": 10.092,
15921
+ "step": 22480
15922
+ },
15923
+ {
15924
+ "epoch": 0.9981082985226977,
15925
+ "grad_norm": 57.88254928588867,
15926
+ "learning_rate": 9.961011421625158e-06,
15927
+ "loss": 10.4509,
15928
+ "step": 22490
15929
+ },
15930
+ {
15931
+ "epoch": 0.9985520994557892,
15932
+ "grad_norm": 63.7093391418457,
15933
+ "learning_rate": 9.960994085663229e-06,
15934
+ "loss": 10.5474,
15935
+ "step": 22500
15936
  }
15937
  ],
15938
  "logging_steps": 10,
 
15952
  "attributes": {}
15953
  }
15954
  },
15955
+ "total_flos": 7.851877211308032e+18,
15956
  "train_batch_size": 4,
15957
  "trial_name": null,
15958
  "trial_params": null