ErrorAI commited on
Commit
7f57f7a
·
verified ·
1 Parent(s): 5781649

Training in progress, step 773, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a00f3914d60206d0be69ae2a2aa20db7a52b95681dc5bfad637f554bf4eb614e
3
  size 80792096
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b9b64dc8416b708564ab9d59be261d9ecadfa40201427399c2e7f5c5a4c8fc14
3
  size 80792096
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:39bfeeb2632bd0dddc1f05355ab8f40f9182d6d512c9643c4181ba4727864440
3
  size 41460084
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3bd2e8eb393e003fbe4e09c5774f39ecf0ce15f5be5a1d82cb0adc4ea59c9c6b
3
  size 41460084
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:44cb312d73619029a00e6f33e844a8427d87126b5d0e0e99120380098ca8eaa1
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:21dbde94f8edaf47214de2b254032cbe2979740ac38bbb264a3164210c85320d
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4f5be4274947d77cbe3ca9add57d2ae0325d823ebb78bc6d716c83662b6f9cd5
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:45719461b132f33086549160c158249554ace1a4b2303f17dcf1d7d8dd8db700
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.7529107373868047,
5
  "eval_steps": 500,
6
- "global_step": 582,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -4081,6 +4081,1343 @@
4081
  "learning_rate": 1.4500332050779386e-05,
4082
  "loss": 1.7656,
4083
  "step": 582
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4084
  }
4085
  ],
4086
  "logging_steps": 1,
@@ -4095,12 +5432,12 @@
4095
  "should_evaluate": false,
4096
  "should_log": false,
4097
  "should_save": true,
4098
- "should_training_stop": false
4099
  },
4100
  "attributes": {}
4101
  }
4102
  },
4103
- "total_flos": 4.056856715298079e+17,
4104
  "train_batch_size": 4,
4105
  "trial_name": null,
4106
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 1.0,
5
  "eval_steps": 500,
6
+ "global_step": 773,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
4081
  "learning_rate": 1.4500332050779386e-05,
4082
  "loss": 1.7656,
4083
  "step": 582
4084
+ },
4085
+ {
4086
+ "epoch": 0.7542043984476067,
4087
+ "grad_norm": 0.6377444863319397,
4088
+ "learning_rate": 1.4356597180781018e-05,
4089
+ "loss": 1.8165,
4090
+ "step": 583
4091
+ },
4092
+ {
4093
+ "epoch": 0.7554980595084088,
4094
+ "grad_norm": 0.6441891193389893,
4095
+ "learning_rate": 1.4213458735809072e-05,
4096
+ "loss": 1.7498,
4097
+ "step": 584
4098
+ },
4099
+ {
4100
+ "epoch": 0.7567917205692108,
4101
+ "grad_norm": 0.6479103565216064,
4102
+ "learning_rate": 1.4070919111015096e-05,
4103
+ "loss": 2.0357,
4104
+ "step": 585
4105
+ },
4106
+ {
4107
+ "epoch": 0.7580853816300129,
4108
+ "grad_norm": 0.6354538798332214,
4109
+ "learning_rate": 1.3928980691530514e-05,
4110
+ "loss": 1.5854,
4111
+ "step": 586
4112
+ },
4113
+ {
4114
+ "epoch": 0.759379042690815,
4115
+ "grad_norm": 0.697376549243927,
4116
+ "learning_rate": 1.3787645852426661e-05,
4117
+ "loss": 1.9868,
4118
+ "step": 587
4119
+ },
4120
+ {
4121
+ "epoch": 0.7606727037516171,
4122
+ "grad_norm": 0.6565506458282471,
4123
+ "learning_rate": 1.364691695867515e-05,
4124
+ "loss": 1.8827,
4125
+ "step": 588
4126
+ },
4127
+ {
4128
+ "epoch": 0.7619663648124192,
4129
+ "grad_norm": 0.6202589869499207,
4130
+ "learning_rate": 1.3506796365108232e-05,
4131
+ "loss": 1.8662,
4132
+ "step": 589
4133
+ },
4134
+ {
4135
+ "epoch": 0.7632600258732212,
4136
+ "grad_norm": 0.7095828056335449,
4137
+ "learning_rate": 1.3367286416379365e-05,
4138
+ "loss": 1.9568,
4139
+ "step": 590
4140
+ },
4141
+ {
4142
+ "epoch": 0.7645536869340233,
4143
+ "grad_norm": 0.7279965877532959,
4144
+ "learning_rate": 1.3228389446924072e-05,
4145
+ "loss": 1.922,
4146
+ "step": 591
4147
+ },
4148
+ {
4149
+ "epoch": 0.7658473479948253,
4150
+ "grad_norm": 0.6959128379821777,
4151
+ "learning_rate": 1.3090107780920807e-05,
4152
+ "loss": 1.9386,
4153
+ "step": 592
4154
+ },
4155
+ {
4156
+ "epoch": 0.7671410090556274,
4157
+ "grad_norm": 0.6932267546653748,
4158
+ "learning_rate": 1.2952443732252057e-05,
4159
+ "loss": 1.8064,
4160
+ "step": 593
4161
+ },
4162
+ {
4163
+ "epoch": 0.7684346701164295,
4164
+ "grad_norm": 0.7042782306671143,
4165
+ "learning_rate": 1.2815399604465672e-05,
4166
+ "loss": 1.9048,
4167
+ "step": 594
4168
+ },
4169
+ {
4170
+ "epoch": 0.7697283311772316,
4171
+ "grad_norm": 0.7183865308761597,
4172
+ "learning_rate": 1.267897769073631e-05,
4173
+ "loss": 1.8095,
4174
+ "step": 595
4175
+ },
4176
+ {
4177
+ "epoch": 0.7710219922380336,
4178
+ "grad_norm": 0.7274515628814697,
4179
+ "learning_rate": 1.2543180273827043e-05,
4180
+ "loss": 1.8595,
4181
+ "step": 596
4182
+ },
4183
+ {
4184
+ "epoch": 0.7723156532988357,
4185
+ "grad_norm": 0.8378881216049194,
4186
+ "learning_rate": 1.2408009626051137e-05,
4187
+ "loss": 1.9941,
4188
+ "step": 597
4189
+ },
4190
+ {
4191
+ "epoch": 0.7736093143596378,
4192
+ "grad_norm": 0.8286713361740112,
4193
+ "learning_rate": 1.2273468009234095e-05,
4194
+ "loss": 1.9748,
4195
+ "step": 598
4196
+ },
4197
+ {
4198
+ "epoch": 0.7749029754204398,
4199
+ "grad_norm": 0.8339402079582214,
4200
+ "learning_rate": 1.2139557674675772e-05,
4201
+ "loss": 1.8496,
4202
+ "step": 599
4203
+ },
4204
+ {
4205
+ "epoch": 0.7761966364812419,
4206
+ "grad_norm": 1.088751196861267,
4207
+ "learning_rate": 1.200628086311269e-05,
4208
+ "loss": 2.094,
4209
+ "step": 600
4210
+ },
4211
+ {
4212
+ "epoch": 0.777490297542044,
4213
+ "grad_norm": 0.4214520752429962,
4214
+ "learning_rate": 1.1873639804680598e-05,
4215
+ "loss": 1.4714,
4216
+ "step": 601
4217
+ },
4218
+ {
4219
+ "epoch": 0.7787839586028461,
4220
+ "grad_norm": 0.450844943523407,
4221
+ "learning_rate": 1.1741636718877053e-05,
4222
+ "loss": 1.518,
4223
+ "step": 602
4224
+ },
4225
+ {
4226
+ "epoch": 0.7800776196636481,
4227
+ "grad_norm": 0.458598256111145,
4228
+ "learning_rate": 1.1610273814524398e-05,
4229
+ "loss": 1.5158,
4230
+ "step": 603
4231
+ },
4232
+ {
4233
+ "epoch": 0.7813712807244502,
4234
+ "grad_norm": 0.47176575660705566,
4235
+ "learning_rate": 1.1479553289732742e-05,
4236
+ "loss": 1.5977,
4237
+ "step": 604
4238
+ },
4239
+ {
4240
+ "epoch": 0.7826649417852523,
4241
+ "grad_norm": 0.4848724603652954,
4242
+ "learning_rate": 1.134947733186315e-05,
4243
+ "loss": 1.5488,
4244
+ "step": 605
4245
+ },
4246
+ {
4247
+ "epoch": 0.7839586028460543,
4248
+ "grad_norm": 0.4914746880531311,
4249
+ "learning_rate": 1.1220048117491123e-05,
4250
+ "loss": 1.579,
4251
+ "step": 606
4252
+ },
4253
+ {
4254
+ "epoch": 0.7852522639068564,
4255
+ "grad_norm": 0.5144307613372803,
4256
+ "learning_rate": 1.1091267812370116e-05,
4257
+ "loss": 1.5866,
4258
+ "step": 607
4259
+ },
4260
+ {
4261
+ "epoch": 0.7865459249676585,
4262
+ "grad_norm": 0.4992072284221649,
4263
+ "learning_rate": 1.0963138571395276e-05,
4264
+ "loss": 1.5532,
4265
+ "step": 608
4266
+ },
4267
+ {
4268
+ "epoch": 0.7878395860284605,
4269
+ "grad_norm": 0.5066733360290527,
4270
+ "learning_rate": 1.0835662538567481e-05,
4271
+ "loss": 1.6586,
4272
+ "step": 609
4273
+ },
4274
+ {
4275
+ "epoch": 0.7891332470892626,
4276
+ "grad_norm": 0.5066677927970886,
4277
+ "learning_rate": 1.0708841846957374e-05,
4278
+ "loss": 1.7165,
4279
+ "step": 610
4280
+ },
4281
+ {
4282
+ "epoch": 0.7904269081500647,
4283
+ "grad_norm": 0.5076642632484436,
4284
+ "learning_rate": 1.058267861866969e-05,
4285
+ "loss": 1.5526,
4286
+ "step": 611
4287
+ },
4288
+ {
4289
+ "epoch": 0.7917205692108668,
4290
+ "grad_norm": 0.5482640862464905,
4291
+ "learning_rate": 1.0457174964807775e-05,
4292
+ "loss": 1.6631,
4293
+ "step": 612
4294
+ },
4295
+ {
4296
+ "epoch": 0.7930142302716688,
4297
+ "grad_norm": 0.534303605556488,
4298
+ "learning_rate": 1.0332332985438248e-05,
4299
+ "loss": 1.6207,
4300
+ "step": 613
4301
+ },
4302
+ {
4303
+ "epoch": 0.7943078913324709,
4304
+ "grad_norm": 0.5623872876167297,
4305
+ "learning_rate": 1.0208154769555827e-05,
4306
+ "loss": 1.6125,
4307
+ "step": 614
4308
+ },
4309
+ {
4310
+ "epoch": 0.795601552393273,
4311
+ "grad_norm": 0.526637852191925,
4312
+ "learning_rate": 1.0084642395048426e-05,
4313
+ "loss": 1.6224,
4314
+ "step": 615
4315
+ },
4316
+ {
4317
+ "epoch": 0.796895213454075,
4318
+ "grad_norm": 0.5447207093238831,
4319
+ "learning_rate": 9.96179792866237e-06,
4320
+ "loss": 1.7249,
4321
+ "step": 616
4322
+ },
4323
+ {
4324
+ "epoch": 0.7981888745148771,
4325
+ "grad_norm": 0.5318351984024048,
4326
+ "learning_rate": 9.83962342596776e-06,
4327
+ "loss": 1.6172,
4328
+ "step": 617
4329
+ },
4330
+ {
4331
+ "epoch": 0.7994825355756792,
4332
+ "grad_norm": 0.5474776029586792,
4333
+ "learning_rate": 9.71812093132416e-06,
4334
+ "loss": 1.619,
4335
+ "step": 618
4336
+ },
4337
+ {
4338
+ "epoch": 0.8007761966364813,
4339
+ "grad_norm": 0.5580310821533203,
4340
+ "learning_rate": 9.597292477846353e-06,
4341
+ "loss": 1.7075,
4342
+ "step": 619
4343
+ },
4344
+ {
4345
+ "epoch": 0.8020698576972833,
4346
+ "grad_norm": 0.5502408146858215,
4347
+ "learning_rate": 9.477140087370268e-06,
4348
+ "loss": 1.6628,
4349
+ "step": 620
4350
+ },
4351
+ {
4352
+ "epoch": 0.8033635187580854,
4353
+ "grad_norm": 0.5557919144630432,
4354
+ "learning_rate": 9.357665770419244e-06,
4355
+ "loss": 1.8303,
4356
+ "step": 621
4357
+ },
4358
+ {
4359
+ "epoch": 0.8046571798188874,
4360
+ "grad_norm": 0.5612048506736755,
4361
+ "learning_rate": 9.238871526170334e-06,
4362
+ "loss": 1.672,
4363
+ "step": 622
4364
+ },
4365
+ {
4366
+ "epoch": 0.8059508408796895,
4367
+ "grad_norm": 0.5757585167884827,
4368
+ "learning_rate": 9.12075934242082e-06,
4369
+ "loss": 1.6857,
4370
+ "step": 623
4371
+ },
4372
+ {
4373
+ "epoch": 0.8072445019404916,
4374
+ "grad_norm": 0.5578299164772034,
4375
+ "learning_rate": 9.003331195555043e-06,
4376
+ "loss": 1.8077,
4377
+ "step": 624
4378
+ },
4379
+ {
4380
+ "epoch": 0.8085381630012937,
4381
+ "grad_norm": 0.5981864929199219,
4382
+ "learning_rate": 8.886589050511257e-06,
4383
+ "loss": 1.7781,
4384
+ "step": 625
4385
+ },
4386
+ {
4387
+ "epoch": 0.8098318240620958,
4388
+ "grad_norm": 0.5973100066184998,
4389
+ "learning_rate": 8.770534860748747e-06,
4390
+ "loss": 1.7344,
4391
+ "step": 626
4392
+ },
4393
+ {
4394
+ "epoch": 0.8111254851228978,
4395
+ "grad_norm": 0.5700523257255554,
4396
+ "learning_rate": 8.655170568215193e-06,
4397
+ "loss": 1.7169,
4398
+ "step": 627
4399
+ },
4400
+ {
4401
+ "epoch": 0.8124191461836999,
4402
+ "grad_norm": 0.6092631220817566,
4403
+ "learning_rate": 8.540498103314154e-06,
4404
+ "loss": 1.8428,
4405
+ "step": 628
4406
+ },
4407
+ {
4408
+ "epoch": 0.8137128072445019,
4409
+ "grad_norm": 0.6047289967536926,
4410
+ "learning_rate": 8.426519384872733e-06,
4411
+ "loss": 1.826,
4412
+ "step": 629
4413
+ },
4414
+ {
4415
+ "epoch": 0.815006468305304,
4416
+ "grad_norm": 0.6209927201271057,
4417
+ "learning_rate": 8.313236320109542e-06,
4418
+ "loss": 1.7561,
4419
+ "step": 630
4420
+ },
4421
+ {
4422
+ "epoch": 0.8163001293661061,
4423
+ "grad_norm": 0.5962238311767578,
4424
+ "learning_rate": 8.200650804602667e-06,
4425
+ "loss": 1.8062,
4426
+ "step": 631
4427
+ },
4428
+ {
4429
+ "epoch": 0.8175937904269082,
4430
+ "grad_norm": 0.6150239109992981,
4431
+ "learning_rate": 8.088764722258097e-06,
4432
+ "loss": 1.7115,
4433
+ "step": 632
4434
+ },
4435
+ {
4436
+ "epoch": 0.8188874514877102,
4437
+ "grad_norm": 0.6377602219581604,
4438
+ "learning_rate": 7.97757994527809e-06,
4439
+ "loss": 1.7661,
4440
+ "step": 633
4441
+ },
4442
+ {
4443
+ "epoch": 0.8201811125485123,
4444
+ "grad_norm": 0.6375808715820312,
4445
+ "learning_rate": 7.86709833412992e-06,
4446
+ "loss": 1.7545,
4447
+ "step": 634
4448
+ },
4449
+ {
4450
+ "epoch": 0.8214747736093143,
4451
+ "grad_norm": 0.641861081123352,
4452
+ "learning_rate": 7.757321737514645e-06,
4453
+ "loss": 1.8139,
4454
+ "step": 635
4455
+ },
4456
+ {
4457
+ "epoch": 0.8227684346701164,
4458
+ "grad_norm": 0.6656633019447327,
4459
+ "learning_rate": 7.648251992336291e-06,
4460
+ "loss": 1.7277,
4461
+ "step": 636
4462
+ },
4463
+ {
4464
+ "epoch": 0.8240620957309185,
4465
+ "grad_norm": 0.6750869154930115,
4466
+ "learning_rate": 7.539890923671062e-06,
4467
+ "loss": 1.946,
4468
+ "step": 637
4469
+ },
4470
+ {
4471
+ "epoch": 0.8253557567917206,
4472
+ "grad_norm": 0.6535232067108154,
4473
+ "learning_rate": 7.43224034473674e-06,
4474
+ "loss": 1.7854,
4475
+ "step": 638
4476
+ },
4477
+ {
4478
+ "epoch": 0.8266494178525227,
4479
+ "grad_norm": 0.6922054886817932,
4480
+ "learning_rate": 7.325302056862477e-06,
4481
+ "loss": 1.7684,
4482
+ "step": 639
4483
+ },
4484
+ {
4485
+ "epoch": 0.8279430789133247,
4486
+ "grad_norm": 0.6872656345367432,
4487
+ "learning_rate": 7.219077849458538e-06,
4488
+ "loss": 1.8425,
4489
+ "step": 640
4490
+ },
4491
+ {
4492
+ "epoch": 0.8292367399741267,
4493
+ "grad_norm": 0.7008704543113708,
4494
+ "learning_rate": 7.1135694999864e-06,
4495
+ "loss": 1.8705,
4496
+ "step": 641
4497
+ },
4498
+ {
4499
+ "epoch": 0.8305304010349288,
4500
+ "grad_norm": 0.729759156703949,
4501
+ "learning_rate": 7.00877877392902e-06,
4502
+ "loss": 1.6901,
4503
+ "step": 642
4504
+ },
4505
+ {
4506
+ "epoch": 0.8318240620957309,
4507
+ "grad_norm": 0.7461994886398315,
4508
+ "learning_rate": 6.904707424761292e-06,
4509
+ "loss": 1.9108,
4510
+ "step": 643
4511
+ },
4512
+ {
4513
+ "epoch": 0.833117723156533,
4514
+ "grad_norm": 0.769800066947937,
4515
+ "learning_rate": 6.801357193920665e-06,
4516
+ "loss": 2.1872,
4517
+ "step": 644
4518
+ },
4519
+ {
4520
+ "epoch": 0.8344113842173351,
4521
+ "grad_norm": 0.7531771659851074,
4522
+ "learning_rate": 6.698729810778065e-06,
4523
+ "loss": 1.8978,
4524
+ "step": 645
4525
+ },
4526
+ {
4527
+ "epoch": 0.8357050452781372,
4528
+ "grad_norm": 0.7885345220565796,
4529
+ "learning_rate": 6.596826992608929e-06,
4530
+ "loss": 1.9816,
4531
+ "step": 646
4532
+ },
4533
+ {
4534
+ "epoch": 0.8369987063389392,
4535
+ "grad_norm": 0.7793993353843689,
4536
+ "learning_rate": 6.495650444564433e-06,
4537
+ "loss": 1.8709,
4538
+ "step": 647
4539
+ },
4540
+ {
4541
+ "epoch": 0.8382923673997412,
4542
+ "grad_norm": 0.8853140473365784,
4543
+ "learning_rate": 6.3952018596430245e-06,
4544
+ "loss": 2.0212,
4545
+ "step": 648
4546
+ },
4547
+ {
4548
+ "epoch": 0.8395860284605433,
4549
+ "grad_norm": 0.8999887108802795,
4550
+ "learning_rate": 6.295482918662066e-06,
4551
+ "loss": 2.2873,
4552
+ "step": 649
4553
+ },
4554
+ {
4555
+ "epoch": 0.8408796895213454,
4556
+ "grad_norm": 0.9850187301635742,
4557
+ "learning_rate": 6.196495290229676e-06,
4558
+ "loss": 2.1436,
4559
+ "step": 650
4560
+ },
4561
+ {
4562
+ "epoch": 0.8421733505821475,
4563
+ "grad_norm": 0.4259641468524933,
4564
+ "learning_rate": 6.098240630716867e-06,
4565
+ "loss": 1.2801,
4566
+ "step": 651
4567
+ },
4568
+ {
4569
+ "epoch": 0.8434670116429496,
4570
+ "grad_norm": 0.46614962816238403,
4571
+ "learning_rate": 6.000720584229802e-06,
4572
+ "loss": 1.567,
4573
+ "step": 652
4574
+ },
4575
+ {
4576
+ "epoch": 0.8447606727037517,
4577
+ "grad_norm": 0.4704464077949524,
4578
+ "learning_rate": 5.903936782582253e-06,
4579
+ "loss": 1.8213,
4580
+ "step": 653
4581
+ },
4582
+ {
4583
+ "epoch": 0.8460543337645536,
4584
+ "grad_norm": 0.4731570780277252,
4585
+ "learning_rate": 5.80789084526836e-06,
4586
+ "loss": 1.6202,
4587
+ "step": 654
4588
+ },
4589
+ {
4590
+ "epoch": 0.8473479948253557,
4591
+ "grad_norm": 0.48283982276916504,
4592
+ "learning_rate": 5.712584379435482e-06,
4593
+ "loss": 1.5875,
4594
+ "step": 655
4595
+ },
4596
+ {
4597
+ "epoch": 0.8486416558861578,
4598
+ "grad_norm": 0.5061183571815491,
4599
+ "learning_rate": 5.618018979857309e-06,
4600
+ "loss": 1.5742,
4601
+ "step": 656
4602
+ },
4603
+ {
4604
+ "epoch": 0.8499353169469599,
4605
+ "grad_norm": 0.48858606815338135,
4606
+ "learning_rate": 5.524196228907203e-06,
4607
+ "loss": 1.5377,
4608
+ "step": 657
4609
+ },
4610
+ {
4611
+ "epoch": 0.851228978007762,
4612
+ "grad_norm": 0.48720577359199524,
4613
+ "learning_rate": 5.4311176965317065e-06,
4614
+ "loss": 1.5387,
4615
+ "step": 658
4616
+ },
4617
+ {
4618
+ "epoch": 0.8525226390685641,
4619
+ "grad_norm": 0.49719151854515076,
4620
+ "learning_rate": 5.33878494022424e-06,
4621
+ "loss": 1.6273,
4622
+ "step": 659
4623
+ },
4624
+ {
4625
+ "epoch": 0.8538163001293662,
4626
+ "grad_norm": 0.4926326274871826,
4627
+ "learning_rate": 5.247199504999106e-06,
4628
+ "loss": 1.7055,
4629
+ "step": 660
4630
+ },
4631
+ {
4632
+ "epoch": 0.8551099611901681,
4633
+ "grad_norm": 0.5013806819915771,
4634
+ "learning_rate": 5.156362923365588e-06,
4635
+ "loss": 1.6868,
4636
+ "step": 661
4637
+ },
4638
+ {
4639
+ "epoch": 0.8564036222509702,
4640
+ "grad_norm": 0.5549349784851074,
4641
+ "learning_rate": 5.066276715302304e-06,
4642
+ "loss": 1.571,
4643
+ "step": 662
4644
+ },
4645
+ {
4646
+ "epoch": 0.8576972833117723,
4647
+ "grad_norm": 0.5115381479263306,
4648
+ "learning_rate": 4.976942388231826e-06,
4649
+ "loss": 1.6374,
4650
+ "step": 663
4651
+ },
4652
+ {
4653
+ "epoch": 0.8589909443725744,
4654
+ "grad_norm": 0.5344304442405701,
4655
+ "learning_rate": 4.888361436995359e-06,
4656
+ "loss": 1.6862,
4657
+ "step": 664
4658
+ },
4659
+ {
4660
+ "epoch": 0.8602846054333765,
4661
+ "grad_norm": 0.5527788400650024,
4662
+ "learning_rate": 4.800535343827833e-06,
4663
+ "loss": 1.7234,
4664
+ "step": 665
4665
+ },
4666
+ {
4667
+ "epoch": 0.8615782664941786,
4668
+ "grad_norm": 0.5573310852050781,
4669
+ "learning_rate": 4.7134655783330425e-06,
4670
+ "loss": 1.6087,
4671
+ "step": 666
4672
+ },
4673
+ {
4674
+ "epoch": 0.8628719275549805,
4675
+ "grad_norm": 0.5454318523406982,
4676
+ "learning_rate": 4.627153597459072e-06,
4677
+ "loss": 1.6846,
4678
+ "step": 667
4679
+ },
4680
+ {
4681
+ "epoch": 0.8641655886157826,
4682
+ "grad_norm": 0.5258266925811768,
4683
+ "learning_rate": 4.541600845473881e-06,
4684
+ "loss": 1.7079,
4685
+ "step": 668
4686
+ },
4687
+ {
4688
+ "epoch": 0.8654592496765847,
4689
+ "grad_norm": 0.5511134266853333,
4690
+ "learning_rate": 4.456808753941205e-06,
4691
+ "loss": 1.6727,
4692
+ "step": 669
4693
+ },
4694
+ {
4695
+ "epoch": 0.8667529107373868,
4696
+ "grad_norm": 0.5851698517799377,
4697
+ "learning_rate": 4.372778741696559e-06,
4698
+ "loss": 1.8386,
4699
+ "step": 670
4700
+ },
4701
+ {
4702
+ "epoch": 0.8680465717981889,
4703
+ "grad_norm": 0.5556474328041077,
4704
+ "learning_rate": 4.289512214823466e-06,
4705
+ "loss": 1.6436,
4706
+ "step": 671
4707
+ },
4708
+ {
4709
+ "epoch": 0.869340232858991,
4710
+ "grad_norm": 0.5633500814437866,
4711
+ "learning_rate": 4.207010566630004e-06,
4712
+ "loss": 1.8678,
4713
+ "step": 672
4714
+ },
4715
+ {
4716
+ "epoch": 0.8706338939197931,
4717
+ "grad_norm": 0.5625820755958557,
4718
+ "learning_rate": 4.1252751776254375e-06,
4719
+ "loss": 1.6911,
4720
+ "step": 673
4721
+ },
4722
+ {
4723
+ "epoch": 0.871927554980595,
4724
+ "grad_norm": 0.607752799987793,
4725
+ "learning_rate": 4.044307415497112e-06,
4726
+ "loss": 1.8531,
4727
+ "step": 674
4728
+ },
4729
+ {
4730
+ "epoch": 0.8732212160413971,
4731
+ "grad_norm": 0.6022449731826782,
4732
+ "learning_rate": 3.964108635087615e-06,
4733
+ "loss": 1.9056,
4734
+ "step": 675
4735
+ },
4736
+ {
4737
+ "epoch": 0.8745148771021992,
4738
+ "grad_norm": 0.5852234363555908,
4739
+ "learning_rate": 3.884680178372069e-06,
4740
+ "loss": 1.7683,
4741
+ "step": 676
4742
+ },
4743
+ {
4744
+ "epoch": 0.8758085381630013,
4745
+ "grad_norm": 0.5821849703788757,
4746
+ "learning_rate": 3.8060233744356633e-06,
4747
+ "loss": 1.8066,
4748
+ "step": 677
4749
+ },
4750
+ {
4751
+ "epoch": 0.8771021992238034,
4752
+ "grad_norm": 0.5757661461830139,
4753
+ "learning_rate": 3.728139539451464e-06,
4754
+ "loss": 1.7934,
4755
+ "step": 678
4756
+ },
4757
+ {
4758
+ "epoch": 0.8783958602846055,
4759
+ "grad_norm": 0.6299412250518799,
4760
+ "learning_rate": 3.65102997665836e-06,
4761
+ "loss": 1.7985,
4762
+ "step": 679
4763
+ },
4764
+ {
4765
+ "epoch": 0.8796895213454075,
4766
+ "grad_norm": 0.6304367184638977,
4767
+ "learning_rate": 3.574695976339226e-06,
4768
+ "loss": 1.7298,
4769
+ "step": 680
4770
+ },
4771
+ {
4772
+ "epoch": 0.8809831824062095,
4773
+ "grad_norm": 0.5838976502418518,
4774
+ "learning_rate": 3.4991388157993966e-06,
4775
+ "loss": 1.7117,
4776
+ "step": 681
4777
+ },
4778
+ {
4779
+ "epoch": 0.8822768434670116,
4780
+ "grad_norm": 0.6452072262763977,
4781
+ "learning_rate": 3.4243597593452523e-06,
4782
+ "loss": 1.7692,
4783
+ "step": 682
4784
+ },
4785
+ {
4786
+ "epoch": 0.8835705045278137,
4787
+ "grad_norm": 0.6496413350105286,
4788
+ "learning_rate": 3.350360058263058e-06,
4789
+ "loss": 1.7692,
4790
+ "step": 683
4791
+ },
4792
+ {
4793
+ "epoch": 0.8848641655886158,
4794
+ "grad_norm": 0.6279682517051697,
4795
+ "learning_rate": 3.277140950798052e-06,
4796
+ "loss": 2.0296,
4797
+ "step": 684
4798
+ },
4799
+ {
4800
+ "epoch": 0.8861578266494179,
4801
+ "grad_norm": 0.6427062749862671,
4802
+ "learning_rate": 3.2047036621337236e-06,
4803
+ "loss": 1.6774,
4804
+ "step": 685
4805
+ },
4806
+ {
4807
+ "epoch": 0.88745148771022,
4808
+ "grad_norm": 0.6491368412971497,
4809
+ "learning_rate": 3.133049404371258e-06,
4810
+ "loss": 1.8565,
4811
+ "step": 686
4812
+ },
4813
+ {
4814
+ "epoch": 0.888745148771022,
4815
+ "grad_norm": 0.6646947860717773,
4816
+ "learning_rate": 3.0621793765093444e-06,
4817
+ "loss": 1.7678,
4818
+ "step": 687
4819
+ },
4820
+ {
4821
+ "epoch": 0.890038809831824,
4822
+ "grad_norm": 0.7040244936943054,
4823
+ "learning_rate": 2.9920947644240473e-06,
4824
+ "loss": 1.8575,
4825
+ "step": 688
4826
+ },
4827
+ {
4828
+ "epoch": 0.8913324708926261,
4829
+ "grad_norm": 0.6854454874992371,
4830
+ "learning_rate": 2.9227967408489653e-06,
4831
+ "loss": 1.7821,
4832
+ "step": 689
4833
+ },
4834
+ {
4835
+ "epoch": 0.8926261319534282,
4836
+ "grad_norm": 0.6900594234466553,
4837
+ "learning_rate": 2.8542864653556546e-06,
4838
+ "loss": 1.8255,
4839
+ "step": 690
4840
+ },
4841
+ {
4842
+ "epoch": 0.8939197930142303,
4843
+ "grad_norm": 0.6792859435081482,
4844
+ "learning_rate": 2.786565084334175e-06,
4845
+ "loss": 1.9492,
4846
+ "step": 691
4847
+ },
4848
+ {
4849
+ "epoch": 0.8952134540750324,
4850
+ "grad_norm": 0.6839621663093567,
4851
+ "learning_rate": 2.7196337309739417e-06,
4852
+ "loss": 1.8545,
4853
+ "step": 692
4854
+ },
4855
+ {
4856
+ "epoch": 0.8965071151358344,
4857
+ "grad_norm": 0.7339411377906799,
4858
+ "learning_rate": 2.653493525244721e-06,
4859
+ "loss": 1.9469,
4860
+ "step": 693
4861
+ },
4862
+ {
4863
+ "epoch": 0.8978007761966365,
4864
+ "grad_norm": 0.7272126078605652,
4865
+ "learning_rate": 2.5881455738779483e-06,
4866
+ "loss": 1.9897,
4867
+ "step": 694
4868
+ },
4869
+ {
4870
+ "epoch": 0.8990944372574385,
4871
+ "grad_norm": 0.7714767456054688,
4872
+ "learning_rate": 2.5235909703481665e-06,
4873
+ "loss": 1.9615,
4874
+ "step": 695
4875
+ },
4876
+ {
4877
+ "epoch": 0.9003880983182406,
4878
+ "grad_norm": 0.7937828302383423,
4879
+ "learning_rate": 2.4598307948547395e-06,
4880
+ "loss": 2.0227,
4881
+ "step": 696
4882
+ },
4883
+ {
4884
+ "epoch": 0.9016817593790427,
4885
+ "grad_norm": 0.837083637714386,
4886
+ "learning_rate": 2.3968661143037862e-06,
4887
+ "loss": 1.8228,
4888
+ "step": 697
4889
+ },
4890
+ {
4891
+ "epoch": 0.9029754204398448,
4892
+ "grad_norm": 0.7950493097305298,
4893
+ "learning_rate": 2.334697982290307e-06,
4894
+ "loss": 1.8966,
4895
+ "step": 698
4896
+ },
4897
+ {
4898
+ "epoch": 0.9042690815006468,
4899
+ "grad_norm": 0.9044060707092285,
4900
+ "learning_rate": 2.273327439080575e-06,
4901
+ "loss": 1.9586,
4902
+ "step": 699
4903
+ },
4904
+ {
4905
+ "epoch": 0.9055627425614489,
4906
+ "grad_norm": 1.0963821411132812,
4907
+ "learning_rate": 2.2127555115947276e-06,
4908
+ "loss": 2.0201,
4909
+ "step": 700
4910
+ },
4911
+ {
4912
+ "epoch": 0.906856403622251,
4913
+ "grad_norm": 0.42113515734672546,
4914
+ "learning_rate": 2.152983213389559e-06,
4915
+ "loss": 1.4756,
4916
+ "step": 701
4917
+ },
4918
+ {
4919
+ "epoch": 0.908150064683053,
4920
+ "grad_norm": 0.47029221057891846,
4921
+ "learning_rate": 2.0940115446415886e-06,
4922
+ "loss": 1.5055,
4923
+ "step": 702
4924
+ },
4925
+ {
4926
+ "epoch": 0.9094437257438551,
4927
+ "grad_norm": 0.4599394202232361,
4928
+ "learning_rate": 2.035841492130319e-06,
4929
+ "loss": 1.7587,
4930
+ "step": 703
4931
+ },
4932
+ {
4933
+ "epoch": 0.9107373868046572,
4934
+ "grad_norm": 0.4609547257423401,
4935
+ "learning_rate": 1.9784740292217108e-06,
4936
+ "loss": 1.5761,
4937
+ "step": 704
4938
+ },
4939
+ {
4940
+ "epoch": 0.9120310478654593,
4941
+ "grad_norm": 0.4895348846912384,
4942
+ "learning_rate": 1.9219101158518993e-06,
4943
+ "loss": 1.7727,
4944
+ "step": 705
4945
+ },
4946
+ {
4947
+ "epoch": 0.9133247089262613,
4948
+ "grad_norm": 0.48977532982826233,
4949
+ "learning_rate": 1.86615069851116e-06,
4950
+ "loss": 1.6288,
4951
+ "step": 706
4952
+ },
4953
+ {
4954
+ "epoch": 0.9146183699870634,
4955
+ "grad_norm": 0.4814504384994507,
4956
+ "learning_rate": 1.811196710228008e-06,
4957
+ "loss": 1.6022,
4958
+ "step": 707
4959
+ },
4960
+ {
4961
+ "epoch": 0.9159120310478654,
4962
+ "grad_norm": 0.5106490254402161,
4963
+ "learning_rate": 1.757049070553657e-06,
4964
+ "loss": 1.6077,
4965
+ "step": 708
4966
+ },
4967
+ {
4968
+ "epoch": 0.9172056921086675,
4969
+ "grad_norm": 0.5112571716308594,
4970
+ "learning_rate": 1.70370868554659e-06,
4971
+ "loss": 1.4259,
4972
+ "step": 709
4973
+ },
4974
+ {
4975
+ "epoch": 0.9184993531694696,
4976
+ "grad_norm": 0.49610233306884766,
4977
+ "learning_rate": 1.6511764477573965e-06,
4978
+ "loss": 1.5944,
4979
+ "step": 710
4980
+ },
4981
+ {
4982
+ "epoch": 0.9197930142302717,
4983
+ "grad_norm": 0.514818549156189,
4984
+ "learning_rate": 1.599453236213866e-06,
4985
+ "loss": 1.4695,
4986
+ "step": 711
4987
+ },
4988
+ {
4989
+ "epoch": 0.9210866752910737,
4990
+ "grad_norm": 0.5288085341453552,
4991
+ "learning_rate": 1.5485399164062552e-06,
4992
+ "loss": 1.6117,
4993
+ "step": 712
4994
+ },
4995
+ {
4996
+ "epoch": 0.9223803363518758,
4997
+ "grad_norm": 0.5362600088119507,
4998
+ "learning_rate": 1.4984373402728014e-06,
4999
+ "loss": 1.5812,
5000
+ "step": 713
5001
+ },
5002
+ {
5003
+ "epoch": 0.9236739974126779,
5004
+ "grad_norm": 0.5225064754486084,
5005
+ "learning_rate": 1.4491463461854981e-06,
5006
+ "loss": 1.6608,
5007
+ "step": 714
5008
+ },
5009
+ {
5010
+ "epoch": 0.92496765847348,
5011
+ "grad_norm": 0.5195352435112,
5012
+ "learning_rate": 1.4006677589360306e-06,
5013
+ "loss": 1.7514,
5014
+ "step": 715
5015
+ },
5016
+ {
5017
+ "epoch": 0.926261319534282,
5018
+ "grad_norm": 0.5668680667877197,
5019
+ "learning_rate": 1.3530023897219968e-06,
5020
+ "loss": 1.7045,
5021
+ "step": 716
5022
+ },
5023
+ {
5024
+ "epoch": 0.9275549805950841,
5025
+ "grad_norm": 0.5474237203598022,
5026
+ "learning_rate": 1.3061510361333185e-06,
5027
+ "loss": 1.7969,
5028
+ "step": 717
5029
+ },
5030
+ {
5031
+ "epoch": 0.9288486416558862,
5032
+ "grad_norm": 0.556367814540863,
5033
+ "learning_rate": 1.2601144821389187e-06,
5034
+ "loss": 1.6464,
5035
+ "step": 718
5036
+ },
5037
+ {
5038
+ "epoch": 0.9301423027166882,
5039
+ "grad_norm": 0.5793610215187073,
5040
+ "learning_rate": 1.214893498073577e-06,
5041
+ "loss": 1.7267,
5042
+ "step": 719
5043
+ },
5044
+ {
5045
+ "epoch": 0.9314359637774903,
5046
+ "grad_norm": 0.5605274438858032,
5047
+ "learning_rate": 1.1704888406250447e-06,
5048
+ "loss": 1.6689,
5049
+ "step": 720
5050
+ },
5051
+ {
5052
+ "epoch": 0.9327296248382924,
5053
+ "grad_norm": 0.559226930141449,
5054
+ "learning_rate": 1.1269012528214107e-06,
5055
+ "loss": 1.7192,
5056
+ "step": 721
5057
+ },
5058
+ {
5059
+ "epoch": 0.9340232858990944,
5060
+ "grad_norm": 0.5833441019058228,
5061
+ "learning_rate": 1.0841314640186228e-06,
5062
+ "loss": 1.6467,
5063
+ "step": 722
5064
+ },
5065
+ {
5066
+ "epoch": 0.9353169469598965,
5067
+ "grad_norm": 0.614727795124054,
5068
+ "learning_rate": 1.0421801898883143e-06,
5069
+ "loss": 1.7859,
5070
+ "step": 723
5071
+ },
5072
+ {
5073
+ "epoch": 0.9366106080206986,
5074
+ "grad_norm": 0.5924329161643982,
5075
+ "learning_rate": 1.0010481324058352e-06,
5076
+ "loss": 1.6152,
5077
+ "step": 724
5078
+ },
5079
+ {
5080
+ "epoch": 0.9379042690815006,
5081
+ "grad_norm": 0.5784980654716492,
5082
+ "learning_rate": 9.607359798384785e-07,
5083
+ "loss": 1.7243,
5084
+ "step": 725
5085
+ },
5086
+ {
5087
+ "epoch": 0.9391979301423027,
5088
+ "grad_norm": 0.5997685790061951,
5089
+ "learning_rate": 9.212444067339787e-07,
5090
+ "loss": 1.7772,
5091
+ "step": 726
5092
+ },
5093
+ {
5094
+ "epoch": 0.9404915912031048,
5095
+ "grad_norm": 0.6061770915985107,
5096
+ "learning_rate": 8.825740739092148e-07,
5097
+ "loss": 1.9133,
5098
+ "step": 727
5099
+ },
5100
+ {
5101
+ "epoch": 0.9417852522639069,
5102
+ "grad_norm": 0.5751582980155945,
5103
+ "learning_rate": 8.447256284391858e-07,
5104
+ "loss": 1.6525,
5105
+ "step": 728
5106
+ },
5107
+ {
5108
+ "epoch": 0.943078913324709,
5109
+ "grad_norm": 0.6227949857711792,
5110
+ "learning_rate": 8.076997036461253e-07,
5111
+ "loss": 1.805,
5112
+ "step": 729
5113
+ },
5114
+ {
5115
+ "epoch": 0.944372574385511,
5116
+ "grad_norm": 0.6226868033409119,
5117
+ "learning_rate": 7.714969190889765e-07,
5118
+ "loss": 1.9385,
5119
+ "step": 730
5120
+ },
5121
+ {
5122
+ "epoch": 0.9456662354463131,
5123
+ "grad_norm": 0.5905411243438721,
5124
+ "learning_rate": 7.36117880552939e-07,
5125
+ "loss": 1.7853,
5126
+ "step": 731
5127
+ },
5128
+ {
5129
+ "epoch": 0.9469598965071151,
5130
+ "grad_norm": 0.6376669406890869,
5131
+ "learning_rate": 7.015631800394107e-07,
5132
+ "loss": 2.0066,
5133
+ "step": 732
5134
+ },
5135
+ {
5136
+ "epoch": 0.9482535575679172,
5137
+ "grad_norm": 0.6215546727180481,
5138
+ "learning_rate": 6.678333957560512e-07,
5139
+ "loss": 1.7759,
5140
+ "step": 733
5141
+ },
5142
+ {
5143
+ "epoch": 0.9495472186287193,
5144
+ "grad_norm": 0.6270310282707214,
5145
+ "learning_rate": 6.349290921070783e-07,
5146
+ "loss": 1.8527,
5147
+ "step": 734
5148
+ },
5149
+ {
5150
+ "epoch": 0.9508408796895214,
5151
+ "grad_norm": 0.6710889935493469,
5152
+ "learning_rate": 6.028508196838811e-07,
5153
+ "loss": 1.863,
5154
+ "step": 735
5155
+ },
5156
+ {
5157
+ "epoch": 0.9521345407503234,
5158
+ "grad_norm": 0.6601763367652893,
5159
+ "learning_rate": 5.715991152557554e-07,
5160
+ "loss": 1.9286,
5161
+ "step": 736
5162
+ },
5163
+ {
5164
+ "epoch": 0.9534282018111255,
5165
+ "grad_norm": 0.6637079119682312,
5166
+ "learning_rate": 5.411745017609493e-07,
5167
+ "loss": 1.9146,
5168
+ "step": 737
5169
+ },
5170
+ {
5171
+ "epoch": 0.9547218628719275,
5172
+ "grad_norm": 0.6605135798454285,
5173
+ "learning_rate": 5.115774882979096e-07,
5174
+ "loss": 1.7605,
5175
+ "step": 738
5176
+ },
5177
+ {
5178
+ "epoch": 0.9560155239327296,
5179
+ "grad_norm": 0.6722599864006042,
5180
+ "learning_rate": 4.828085701167607e-07,
5181
+ "loss": 1.8122,
5182
+ "step": 739
5183
+ },
5184
+ {
5185
+ "epoch": 0.9573091849935317,
5186
+ "grad_norm": 0.6760243773460388,
5187
+ "learning_rate": 4.548682286109995e-07,
5188
+ "loss": 1.6851,
5189
+ "step": 740
5190
+ },
5191
+ {
5192
+ "epoch": 0.9586028460543338,
5193
+ "grad_norm": 0.6736776232719421,
5194
+ "learning_rate": 4.277569313094809e-07,
5195
+ "loss": 1.786,
5196
+ "step": 741
5197
+ },
5198
+ {
5199
+ "epoch": 0.9598965071151359,
5200
+ "grad_norm": 0.7444686889648438,
5201
+ "learning_rate": 4.0147513186855615e-07,
5202
+ "loss": 1.9045,
5203
+ "step": 742
5204
+ },
5205
+ {
5206
+ "epoch": 0.9611901681759379,
5207
+ "grad_norm": 0.7153714299201965,
5208
+ "learning_rate": 3.7602327006450167e-07,
5209
+ "loss": 2.0627,
5210
+ "step": 743
5211
+ },
5212
+ {
5213
+ "epoch": 0.96248382923674,
5214
+ "grad_norm": 0.7691993117332458,
5215
+ "learning_rate": 3.514017717861529e-07,
5216
+ "loss": 2.0108,
5217
+ "step": 744
5218
+ },
5219
+ {
5220
+ "epoch": 0.963777490297542,
5221
+ "grad_norm": 0.7638096809387207,
5222
+ "learning_rate": 3.2761104902778173e-07,
5223
+ "loss": 1.9658,
5224
+ "step": 745
5225
+ },
5226
+ {
5227
+ "epoch": 0.9650711513583441,
5228
+ "grad_norm": 0.7966986894607544,
5229
+ "learning_rate": 3.04651499882197e-07,
5230
+ "loss": 2.1942,
5231
+ "step": 746
5232
+ },
5233
+ {
5234
+ "epoch": 0.9663648124191462,
5235
+ "grad_norm": 0.794858455657959,
5236
+ "learning_rate": 2.825235085340938e-07,
5237
+ "loss": 1.936,
5238
+ "step": 747
5239
+ },
5240
+ {
5241
+ "epoch": 0.9676584734799483,
5242
+ "grad_norm": 0.858466386795044,
5243
+ "learning_rate": 2.612274452536201e-07,
5244
+ "loss": 1.9205,
5245
+ "step": 748
5246
+ },
5247
+ {
5248
+ "epoch": 0.9689521345407504,
5249
+ "grad_norm": 1.1012519598007202,
5250
+ "learning_rate": 2.407636663901591e-07,
5251
+ "loss": 2.1395,
5252
+ "step": 749
5253
+ },
5254
+ {
5255
+ "epoch": 0.9702457956015524,
5256
+ "grad_norm": 1.0824167728424072,
5257
+ "learning_rate": 2.211325143664067e-07,
5258
+ "loss": 2.106,
5259
+ "step": 750
5260
+ },
5261
+ {
5262
+ "epoch": 0.9715394566623544,
5263
+ "grad_norm": 0.4427192211151123,
5264
+ "learning_rate": 2.0233431767261447e-07,
5265
+ "loss": 1.7156,
5266
+ "step": 751
5267
+ },
5268
+ {
5269
+ "epoch": 0.9728331177231565,
5270
+ "grad_norm": 0.4759114384651184,
5271
+ "learning_rate": 1.8436939086109995e-07,
5272
+ "loss": 1.5656,
5273
+ "step": 752
5274
+ },
5275
+ {
5276
+ "epoch": 0.9741267787839586,
5277
+ "grad_norm": 0.48882219195365906,
5278
+ "learning_rate": 1.6723803454098408e-07,
5279
+ "loss": 1.6891,
5280
+ "step": 753
5281
+ },
5282
+ {
5283
+ "epoch": 0.9754204398447607,
5284
+ "grad_norm": 0.48134151101112366,
5285
+ "learning_rate": 1.5094053537316188e-07,
5286
+ "loss": 1.4881,
5287
+ "step": 754
5288
+ },
5289
+ {
5290
+ "epoch": 0.9767141009055628,
5291
+ "grad_norm": 0.528454601764679,
5292
+ "learning_rate": 1.3547716606548966e-07,
5293
+ "loss": 1.4452,
5294
+ "step": 755
5295
+ },
5296
+ {
5297
+ "epoch": 0.9780077619663649,
5298
+ "grad_norm": 0.5208367109298706,
5299
+ "learning_rate": 1.2084818536825526e-07,
5300
+ "loss": 1.4119,
5301
+ "step": 756
5302
+ },
5303
+ {
5304
+ "epoch": 0.9793014230271668,
5305
+ "grad_norm": 0.5646911859512329,
5306
+ "learning_rate": 1.0705383806982606e-07,
5307
+ "loss": 1.6763,
5308
+ "step": 757
5309
+ },
5310
+ {
5311
+ "epoch": 0.9805950840879689,
5312
+ "grad_norm": 0.531313955783844,
5313
+ "learning_rate": 9.409435499254104e-08,
5314
+ "loss": 1.6581,
5315
+ "step": 758
5316
+ },
5317
+ {
5318
+ "epoch": 0.981888745148771,
5319
+ "grad_norm": 0.5821428894996643,
5320
+ "learning_rate": 8.196995298887511e-08,
5321
+ "loss": 1.7181,
5322
+ "step": 759
5323
+ },
5324
+ {
5325
+ "epoch": 0.9831824062095731,
5326
+ "grad_norm": 0.5947988033294678,
5327
+ "learning_rate": 7.068083493779743e-08,
5328
+ "loss": 1.8464,
5329
+ "step": 760
5330
+ },
5331
+ {
5332
+ "epoch": 0.9844760672703752,
5333
+ "grad_norm": 0.5871140956878662,
5334
+ "learning_rate": 6.022718974137975e-08,
5335
+ "loss": 1.7817,
5336
+ "step": 761
5337
+ },
5338
+ {
5339
+ "epoch": 0.9857697283311773,
5340
+ "grad_norm": 0.6064710021018982,
5341
+ "learning_rate": 5.060919232162675e-08,
5342
+ "loss": 1.6367,
5343
+ "step": 762
5344
+ },
5345
+ {
5346
+ "epoch": 0.9870633893919794,
5347
+ "grad_norm": 0.613859236240387,
5348
+ "learning_rate": 4.182700361756164e-08,
5349
+ "loss": 1.8499,
5350
+ "step": 763
5351
+ },
5352
+ {
5353
+ "epoch": 0.9883570504527813,
5354
+ "grad_norm": 0.6105691194534302,
5355
+ "learning_rate": 3.388077058252281e-08,
5356
+ "loss": 1.7682,
5357
+ "step": 764
5358
+ },
5359
+ {
5360
+ "epoch": 0.9896507115135834,
5361
+ "grad_norm": 0.6665717959403992,
5362
+ "learning_rate": 2.6770626181715773e-08,
5363
+ "loss": 1.8694,
5364
+ "step": 765
5365
+ },
5366
+ {
5367
+ "epoch": 0.9909443725743855,
5368
+ "grad_norm": 0.6197919249534607,
5369
+ "learning_rate": 2.0496689389976065e-08,
5370
+ "loss": 1.8789,
5371
+ "step": 766
5372
+ },
5373
+ {
5374
+ "epoch": 0.9922380336351876,
5375
+ "grad_norm": 0.6365950107574463,
5376
+ "learning_rate": 1.5059065189787503e-08,
5377
+ "loss": 1.8171,
5378
+ "step": 767
5379
+ },
5380
+ {
5381
+ "epoch": 0.9935316946959897,
5382
+ "grad_norm": 0.6993787884712219,
5383
+ "learning_rate": 1.045784456952248e-08,
5384
+ "loss": 2.0328,
5385
+ "step": 768
5386
+ },
5387
+ {
5388
+ "epoch": 0.9948253557567918,
5389
+ "grad_norm": 0.7297689318656921,
5390
+ "learning_rate": 6.693104521909854e-09,
5391
+ "loss": 2.0254,
5392
+ "step": 769
5393
+ },
5394
+ {
5395
+ "epoch": 0.9961190168175937,
5396
+ "grad_norm": 0.7178072333335876,
5397
+ "learning_rate": 3.764908042774851e-09,
5398
+ "loss": 1.8857,
5399
+ "step": 770
5400
+ },
5401
+ {
5402
+ "epoch": 0.9974126778783958,
5403
+ "grad_norm": 0.7650882005691528,
5404
+ "learning_rate": 1.6733041299454855e-09,
5405
+ "loss": 1.8953,
5406
+ "step": 771
5407
+ },
5408
+ {
5409
+ "epoch": 0.9987063389391979,
5410
+ "grad_norm": 0.9221259951591492,
5411
+ "learning_rate": 4.183277824698628e-10,
5412
+ "loss": 1.9443,
5413
+ "step": 772
5414
+ },
5415
+ {
5416
+ "epoch": 1.0,
5417
+ "grad_norm": 1.0081813335418701,
5418
+ "learning_rate": 0.0,
5419
+ "loss": 2.1013,
5420
+ "step": 773
5421
  }
5422
  ],
5423
  "logging_steps": 1,
 
5432
  "should_evaluate": false,
5433
  "should_log": false,
5434
  "should_save": true,
5435
+ "should_training_stop": true
5436
  },
5437
  "attributes": {}
5438
  }
5439
  },
5440
+ "total_flos": 5.3869236774764544e+17,
5441
  "train_batch_size": 4,
5442
  "trial_name": null,
5443
  "trial_params": null