magatex commited on
Commit
3f36f31
·
verified ·
1 Parent(s): a3bc785

Training in progress, step 600, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7c3c33e8a4a8ed43dc3420bd1386f3c762dc8aac9f1b9d6abc028e43a8a65191
3
  size 13587864
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4899466220e60a7be496bec2b0702ec77d7b42f7c2abb9c89d5990de66858d42
3
  size 13587864
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:df04fbad8806984f9879dd0372c5fd9dec6a52cd99cb407e7ce66b7b1b2d44db
3
  size 27273018
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:04f03abccf9919cb7e6cb74b0991c90780af63ba8366b3ce90419ba802cfed1e
3
  size 27273018
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e7950f08aabef52e7e1944935a980baf99b7b3f4d93fe07a39564c7e2a0b20d9
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3464f03cc2695b7e00d9a022811a7d39834c49f04402f78117f949150b3e4b68
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9a42c6236f225c51c1bc182b25f6928e0a7938d42d43dbe4f5aeb61527d641f0
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7c10b97f44aaa8e8a1f8b6b752fbcc49ead2ee866b2143ac6a7831438a80daac
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 0.004960117861628532,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-450",
4
- "epoch": 1.8051282051282052,
5
  "eval_steps": 25,
6
- "global_step": 550,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -4041,6 +4041,372 @@
4041
  "eval_samples_per_second": 47.487,
4042
  "eval_steps_per_second": 47.487,
4043
  "step": 550
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4044
  }
4045
  ],
4046
  "logging_steps": 1,
@@ -4055,7 +4421,7 @@
4055
  "early_stopping_threshold": 0.0
4056
  },
4057
  "attributes": {
4058
- "early_stopping_patience_counter": 1
4059
  }
4060
  },
4061
  "TrainerControl": {
@@ -4064,12 +4430,12 @@
4064
  "should_evaluate": false,
4065
  "should_log": false,
4066
  "should_save": true,
4067
- "should_training_stop": false
4068
  },
4069
  "attributes": {}
4070
  }
4071
  },
4072
- "total_flos": 3.3508451401334784e+16,
4073
  "train_batch_size": 1,
4074
  "trial_name": null,
4075
  "trial_params": null
 
1
  {
2
  "best_metric": 0.004960117861628532,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-450",
4
+ "epoch": 1.9692307692307693,
5
  "eval_steps": 25,
6
+ "global_step": 600,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
4041
  "eval_samples_per_second": 47.487,
4042
  "eval_steps_per_second": 47.487,
4043
  "step": 550
4044
+ },
4045
+ {
4046
+ "epoch": 1.8084102564102564,
4047
+ "grad_norm": 0.0034276428632438183,
4048
+ "learning_rate": 2.3195015539906243e-05,
4049
+ "loss": 0.0049,
4050
+ "step": 551
4051
+ },
4052
+ {
4053
+ "epoch": 1.8116923076923077,
4054
+ "grad_norm": 0.0030685942620038986,
4055
+ "learning_rate": 2.3066675633983865e-05,
4056
+ "loss": 0.0048,
4057
+ "step": 552
4058
+ },
4059
+ {
4060
+ "epoch": 1.814974358974359,
4061
+ "grad_norm": 0.0046894908882677555,
4062
+ "learning_rate": 2.2940921796353956e-05,
4063
+ "loss": 0.0047,
4064
+ "step": 553
4065
+ },
4066
+ {
4067
+ "epoch": 1.8182564102564103,
4068
+ "grad_norm": 0.0035674276296049356,
4069
+ "learning_rate": 2.2817757767906625e-05,
4070
+ "loss": 0.0048,
4071
+ "step": 554
4072
+ },
4073
+ {
4074
+ "epoch": 1.8215384615384616,
4075
+ "grad_norm": 0.005269620567560196,
4076
+ "learning_rate": 2.2697187212491044e-05,
4077
+ "loss": 0.0051,
4078
+ "step": 555
4079
+ },
4080
+ {
4081
+ "epoch": 1.8248205128205128,
4082
+ "grad_norm": 0.008738451637327671,
4083
+ "learning_rate": 2.2579213716806474e-05,
4084
+ "loss": 0.0052,
4085
+ "step": 556
4086
+ },
4087
+ {
4088
+ "epoch": 1.828102564102564,
4089
+ "grad_norm": 0.008472139947116375,
4090
+ "learning_rate": 2.2463840790295566e-05,
4091
+ "loss": 0.0051,
4092
+ "step": 557
4093
+ },
4094
+ {
4095
+ "epoch": 1.8313846153846154,
4096
+ "grad_norm": 0.008605373091995716,
4097
+ "learning_rate": 2.2351071865039974e-05,
4098
+ "loss": 0.0051,
4099
+ "step": 558
4100
+ },
4101
+ {
4102
+ "epoch": 1.8346666666666667,
4103
+ "grad_norm": 0.02175315096974373,
4104
+ "learning_rate": 2.224091029565824e-05,
4105
+ "loss": 0.0053,
4106
+ "step": 559
4107
+ },
4108
+ {
4109
+ "epoch": 1.837948717948718,
4110
+ "grad_norm": 0.008465359918773174,
4111
+ "learning_rate": 2.2133359359206e-05,
4112
+ "loss": 0.0052,
4113
+ "step": 560
4114
+ },
4115
+ {
4116
+ "epoch": 1.8412307692307692,
4117
+ "grad_norm": 0.007232977543026209,
4118
+ "learning_rate": 2.2028422255078542e-05,
4119
+ "loss": 0.0052,
4120
+ "step": 561
4121
+ },
4122
+ {
4123
+ "epoch": 1.8445128205128205,
4124
+ "grad_norm": 0.007051311433315277,
4125
+ "learning_rate": 2.1926102104915553e-05,
4126
+ "loss": 0.0051,
4127
+ "step": 562
4128
+ },
4129
+ {
4130
+ "epoch": 1.8477948717948718,
4131
+ "grad_norm": 0.006151077803224325,
4132
+ "learning_rate": 2.182640195250835e-05,
4133
+ "loss": 0.005,
4134
+ "step": 563
4135
+ },
4136
+ {
4137
+ "epoch": 1.851076923076923,
4138
+ "grad_norm": 0.006573867984116077,
4139
+ "learning_rate": 2.1729324763709264e-05,
4140
+ "loss": 0.0051,
4141
+ "step": 564
4142
+ },
4143
+ {
4144
+ "epoch": 1.8543589743589743,
4145
+ "grad_norm": 0.00678396737203002,
4146
+ "learning_rate": 2.1634873426343427e-05,
4147
+ "loss": 0.0049,
4148
+ "step": 565
4149
+ },
4150
+ {
4151
+ "epoch": 1.8576410256410256,
4152
+ "grad_norm": 0.005578219890594482,
4153
+ "learning_rate": 2.1543050750122902e-05,
4154
+ "loss": 0.0048,
4155
+ "step": 566
4156
+ },
4157
+ {
4158
+ "epoch": 1.860923076923077,
4159
+ "grad_norm": 0.0040833973325788975,
4160
+ "learning_rate": 2.145385946656303e-05,
4161
+ "loss": 0.0047,
4162
+ "step": 567
4163
+ },
4164
+ {
4165
+ "epoch": 1.8642051282051282,
4166
+ "grad_norm": 0.004177347291260958,
4167
+ "learning_rate": 2.1367302228901282e-05,
4168
+ "loss": 0.0046,
4169
+ "step": 568
4170
+ },
4171
+ {
4172
+ "epoch": 1.8674871794871795,
4173
+ "grad_norm": 0.0036663906648755074,
4174
+ "learning_rate": 2.128338161201819e-05,
4175
+ "loss": 0.0047,
4176
+ "step": 569
4177
+ },
4178
+ {
4179
+ "epoch": 1.8707692307692307,
4180
+ "grad_norm": 0.003597427159547806,
4181
+ "learning_rate": 2.1202100112360894e-05,
4182
+ "loss": 0.0048,
4183
+ "step": 570
4184
+ },
4185
+ {
4186
+ "epoch": 1.874051282051282,
4187
+ "grad_norm": 0.0029398370534181595,
4188
+ "learning_rate": 2.1123460147868763e-05,
4189
+ "loss": 0.0048,
4190
+ "step": 571
4191
+ },
4192
+ {
4193
+ "epoch": 1.8773333333333333,
4194
+ "grad_norm": 0.003072077641263604,
4195
+ "learning_rate": 2.1047464057901542e-05,
4196
+ "loss": 0.0048,
4197
+ "step": 572
4198
+ },
4199
+ {
4200
+ "epoch": 1.8806153846153846,
4201
+ "grad_norm": 0.002605011221021414,
4202
+ "learning_rate": 2.0974114103169712e-05,
4203
+ "loss": 0.0048,
4204
+ "step": 573
4205
+ },
4206
+ {
4207
+ "epoch": 1.8838974358974359,
4208
+ "grad_norm": 0.002371675567701459,
4209
+ "learning_rate": 2.0903412465667293e-05,
4210
+ "loss": 0.0047,
4211
+ "step": 574
4212
+ },
4213
+ {
4214
+ "epoch": 1.8871794871794871,
4215
+ "grad_norm": 0.002911495743319392,
4216
+ "learning_rate": 2.0835361248606867e-05,
4217
+ "loss": 0.0047,
4218
+ "step": 575
4219
+ },
4220
+ {
4221
+ "epoch": 1.8871794871794871,
4222
+ "eval_loss": 0.0050178528763353825,
4223
+ "eval_runtime": 1.0828,
4224
+ "eval_samples_per_second": 46.176,
4225
+ "eval_steps_per_second": 46.176,
4226
+ "step": 575
4227
+ },
4228
+ {
4229
+ "epoch": 1.8904615384615384,
4230
+ "grad_norm": 0.0025259945541620255,
4231
+ "learning_rate": 2.0769962476357068e-05,
4232
+ "loss": 0.0047,
4233
+ "step": 576
4234
+ },
4235
+ {
4236
+ "epoch": 1.8937435897435897,
4237
+ "grad_norm": 0.0023200158029794693,
4238
+ "learning_rate": 2.070721809438233e-05,
4239
+ "loss": 0.0047,
4240
+ "step": 577
4241
+ },
4242
+ {
4243
+ "epoch": 1.897025641025641,
4244
+ "grad_norm": 0.0023292931728065014,
4245
+ "learning_rate": 2.0647129969185046e-05,
4246
+ "loss": 0.0048,
4247
+ "step": 578
4248
+ },
4249
+ {
4250
+ "epoch": 1.9003076923076923,
4251
+ "grad_norm": 0.0025951117277145386,
4252
+ "learning_rate": 2.058969988825001e-05,
4253
+ "loss": 0.0047,
4254
+ "step": 579
4255
+ },
4256
+ {
4257
+ "epoch": 1.9035897435897438,
4258
+ "grad_norm": 0.0026415924075990915,
4259
+ "learning_rate": 2.0534929559991233e-05,
4260
+ "loss": 0.0047,
4261
+ "step": 580
4262
+ },
4263
+ {
4264
+ "epoch": 1.9068717948717948,
4265
+ "grad_norm": 0.0020874382462352514,
4266
+ "learning_rate": 2.0482820613701192e-05,
4267
+ "loss": 0.0046,
4268
+ "step": 581
4269
+ },
4270
+ {
4271
+ "epoch": 1.9101538461538463,
4272
+ "grad_norm": 0.002052360912784934,
4273
+ "learning_rate": 2.043337459950229e-05,
4274
+ "loss": 0.0046,
4275
+ "step": 582
4276
+ },
4277
+ {
4278
+ "epoch": 1.9134358974358974,
4279
+ "grad_norm": 0.0021120973397046328,
4280
+ "learning_rate": 2.0386592988300747e-05,
4281
+ "loss": 0.0046,
4282
+ "step": 583
4283
+ },
4284
+ {
4285
+ "epoch": 1.9167179487179489,
4286
+ "grad_norm": 0.0021454044617712498,
4287
+ "learning_rate": 2.03424771717429e-05,
4288
+ "loss": 0.0047,
4289
+ "step": 584
4290
+ },
4291
+ {
4292
+ "epoch": 1.92,
4293
+ "grad_norm": 0.0023362315259873867,
4294
+ "learning_rate": 2.0301028462173774e-05,
4295
+ "loss": 0.0048,
4296
+ "step": 585
4297
+ },
4298
+ {
4299
+ "epoch": 1.9232820512820514,
4300
+ "grad_norm": 0.002209689933806658,
4301
+ "learning_rate": 2.0262248092598006e-05,
4302
+ "loss": 0.0048,
4303
+ "step": 586
4304
+ },
4305
+ {
4306
+ "epoch": 1.9265641025641025,
4307
+ "grad_norm": 0.0022381660528481007,
4308
+ "learning_rate": 2.0226137216643222e-05,
4309
+ "loss": 0.0048,
4310
+ "step": 587
4311
+ },
4312
+ {
4313
+ "epoch": 1.929846153846154,
4314
+ "grad_norm": 0.002202109433710575,
4315
+ "learning_rate": 2.019269690852569e-05,
4316
+ "loss": 0.0047,
4317
+ "step": 588
4318
+ },
4319
+ {
4320
+ "epoch": 1.933128205128205,
4321
+ "grad_norm": 0.0021981867030262947,
4322
+ "learning_rate": 2.016192816301837e-05,
4323
+ "loss": 0.0046,
4324
+ "step": 589
4325
+ },
4326
+ {
4327
+ "epoch": 1.9364102564102565,
4328
+ "grad_norm": 0.002059696475043893,
4329
+ "learning_rate": 2.0133831895421322e-05,
4330
+ "loss": 0.0047,
4331
+ "step": 590
4332
+ },
4333
+ {
4334
+ "epoch": 1.9396923076923076,
4335
+ "grad_norm": 0.0020739359315484762,
4336
+ "learning_rate": 2.0108408941534486e-05,
4337
+ "loss": 0.0046,
4338
+ "step": 591
4339
+ },
4340
+ {
4341
+ "epoch": 1.942974358974359,
4342
+ "grad_norm": 0.0024034185335040092,
4343
+ "learning_rate": 2.00856600576328e-05,
4344
+ "loss": 0.0047,
4345
+ "step": 592
4346
+ },
4347
+ {
4348
+ "epoch": 1.9462564102564102,
4349
+ "grad_norm": 0.0022281610872596502,
4350
+ "learning_rate": 2.006558592044373e-05,
4351
+ "loss": 0.0048,
4352
+ "step": 593
4353
+ },
4354
+ {
4355
+ "epoch": 1.9495384615384617,
4356
+ "grad_norm": 0.0029593328945338726,
4357
+ "learning_rate": 2.0048187127127092e-05,
4358
+ "loss": 0.0049,
4359
+ "step": 594
4360
+ },
4361
+ {
4362
+ "epoch": 1.9528205128205127,
4363
+ "grad_norm": 0.002573527628555894,
4364
+ "learning_rate": 2.003346419525735e-05,
4365
+ "loss": 0.0048,
4366
+ "step": 595
4367
+ },
4368
+ {
4369
+ "epoch": 1.9561025641025642,
4370
+ "grad_norm": 0.002822197275236249,
4371
+ "learning_rate": 2.002141756280818e-05,
4372
+ "loss": 0.0047,
4373
+ "step": 596
4374
+ },
4375
+ {
4376
+ "epoch": 1.9593846153846153,
4377
+ "grad_norm": 0.002600959734991193,
4378
+ "learning_rate": 2.001204758813944e-05,
4379
+ "loss": 0.0047,
4380
+ "step": 597
4381
+ },
4382
+ {
4383
+ "epoch": 1.9626666666666668,
4384
+ "grad_norm": 0.003187810303643346,
4385
+ "learning_rate": 2.0005354549986523e-05,
4386
+ "loss": 0.0047,
4387
+ "step": 598
4388
+ },
4389
+ {
4390
+ "epoch": 1.9659487179487178,
4391
+ "grad_norm": 0.0029263379983603954,
4392
+ "learning_rate": 2.0001338647452058e-05,
4393
+ "loss": 0.0048,
4394
+ "step": 599
4395
+ },
4396
+ {
4397
+ "epoch": 1.9692307692307693,
4398
+ "grad_norm": 0.003195718163624406,
4399
+ "learning_rate": 2e-05,
4400
+ "loss": 0.0048,
4401
+ "step": 600
4402
+ },
4403
+ {
4404
+ "epoch": 1.9692307692307693,
4405
+ "eval_loss": 0.004978457931429148,
4406
+ "eval_runtime": 1.0723,
4407
+ "eval_samples_per_second": 46.628,
4408
+ "eval_steps_per_second": 46.628,
4409
+ "step": 600
4410
  }
4411
  ],
4412
  "logging_steps": 1,
 
4421
  "early_stopping_threshold": 0.0
4422
  },
4423
  "attributes": {
4424
+ "early_stopping_patience_counter": 3
4425
  }
4426
  },
4427
  "TrainerControl": {
 
4430
  "should_evaluate": false,
4431
  "should_log": false,
4432
  "should_save": true,
4433
+ "should_training_stop": true
4434
  },
4435
  "attributes": {}
4436
  }
4437
  },
4438
+ "total_flos": 3.654552359691878e+16,
4439
  "train_batch_size": 1,
4440
  "trial_name": null,
4441
  "trial_params": null