magatex commited on
Commit
f84a09c
·
verified ·
1 Parent(s): 6a733fe

Training in progress, step 592, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:784a3ff3fc84f3309bb405e9f91f112e3a61d4a0cb9f2c6e822bd6a878858326
3
  size 60010048
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:727ef6499c3b2029862b640bd0783bcd2d1d77402e26d95bca288a369396ffdf
3
  size 60010048
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:91ba8047dcee23b14a1ea4b0abce7249941fe17ab3f28a1b96219ca15f6dd511
3
  size 120213058
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:36f40407089333f17b49c25519b1a1948c002a6cf29f0dd3e129ac1c5424f9b5
3
  size 120213058
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8d3c6e0bc4d6caf36d14b922d7bf99043ffb0fc6496f31b9e5e063d07fde6e06
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:da699b9d89f916da840476c4663b34c3317d4235c857b22c694b924a7e8d83f6
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3ea1fdb86a115854271995d284a3ce89e28b576b20e5293e4b93fa4b47fd3d21
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3e426754c87d665fc6a368f5f0109f66999a2f562e7ade96fcf16cbb316f70e2
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 1.493194580078125,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-550",
4
- "epoch": 1.1920081273281409,
5
  "eval_steps": 25,
6
- "global_step": 550,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -4041,6 +4041,308 @@
4041
  "eval_samples_per_second": 26.671,
4042
  "eval_steps_per_second": 26.671,
4043
  "step": 550
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4044
  }
4045
  ],
4046
  "logging_steps": 1,
@@ -4055,7 +4357,7 @@
4055
  "early_stopping_threshold": 0.0
4056
  },
4057
  "attributes": {
4058
- "early_stopping_patience_counter": 0
4059
  }
4060
  },
4061
  "TrainerControl": {
@@ -4064,12 +4366,12 @@
4064
  "should_evaluate": false,
4065
  "should_log": false,
4066
  "should_save": true,
4067
- "should_training_stop": false
4068
  },
4069
  "attributes": {}
4070
  }
4071
  },
4072
- "total_flos": 1.8051015272143258e+17,
4073
  "train_batch_size": 1,
4074
  "trial_name": null,
4075
  "trial_params": null
 
1
  {
2
  "best_metric": 1.493194580078125,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-550",
4
+ "epoch": 1.2830342025059263,
5
  "eval_steps": 25,
6
+ "global_step": 592,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
4041
  "eval_samples_per_second": 26.671,
4042
  "eval_steps_per_second": 26.671,
4043
  "step": 550
4044
+ },
4045
+ {
4046
+ "epoch": 1.1941754148323738,
4047
+ "grad_norm": 0.745163083076477,
4048
+ "learning_rate": 2.2296148993937625e-05,
4049
+ "loss": 1.5242,
4050
+ "step": 551
4051
+ },
4052
+ {
4053
+ "epoch": 1.196342702336607,
4054
+ "grad_norm": 0.7508656978607178,
4055
+ "learning_rate": 2.2185957798587907e-05,
4056
+ "loss": 1.4407,
4057
+ "step": 552
4058
+ },
4059
+ {
4060
+ "epoch": 1.1985099898408398,
4061
+ "grad_norm": 0.7356210350990295,
4062
+ "learning_rate": 2.20784435397923e-05,
4063
+ "loss": 1.1741,
4064
+ "step": 553
4065
+ },
4066
+ {
4067
+ "epoch": 1.200677277345073,
4068
+ "grad_norm": 0.817733883857727,
4069
+ "learning_rate": 2.1973609495031733e-05,
4070
+ "loss": 1.2159,
4071
+ "step": 554
4072
+ },
4073
+ {
4074
+ "epoch": 1.2028445648493058,
4075
+ "grad_norm": 0.7189146876335144,
4076
+ "learning_rate": 2.18714588600831e-05,
4077
+ "loss": 0.9351,
4078
+ "step": 555
4079
+ },
4080
+ {
4081
+ "epoch": 1.2050118523535387,
4082
+ "grad_norm": 0.7796960473060608,
4083
+ "learning_rate": 2.177199474892181e-05,
4084
+ "loss": 1.0885,
4085
+ "step": 556
4086
+ },
4087
+ {
4088
+ "epoch": 1.2071791398577718,
4089
+ "grad_norm": 0.8709949851036072,
4090
+ "learning_rate": 2.1675220193626897e-05,
4091
+ "loss": 1.0783,
4092
+ "step": 557
4093
+ },
4094
+ {
4095
+ "epoch": 1.2093464273620047,
4096
+ "grad_norm": 0.8464658856391907,
4097
+ "learning_rate": 2.1581138144288525e-05,
4098
+ "loss": 0.9859,
4099
+ "step": 558
4100
+ },
4101
+ {
4102
+ "epoch": 1.2115137148662378,
4103
+ "grad_norm": 0.9255772829055786,
4104
+ "learning_rate": 2.148975146891817e-05,
4105
+ "loss": 0.94,
4106
+ "step": 559
4107
+ },
4108
+ {
4109
+ "epoch": 1.2136810023704707,
4110
+ "grad_norm": 0.9168251156806946,
4111
+ "learning_rate": 2.140106295336103e-05,
4112
+ "loss": 0.6344,
4113
+ "step": 560
4114
+ },
4115
+ {
4116
+ "epoch": 1.2158482898747036,
4117
+ "grad_norm": 0.9316290020942688,
4118
+ "learning_rate": 2.1315075301211272e-05,
4119
+ "loss": 0.6167,
4120
+ "step": 561
4121
+ },
4122
+ {
4123
+ "epoch": 1.2180155773789367,
4124
+ "grad_norm": 0.5710924863815308,
4125
+ "learning_rate": 2.123179113372946e-05,
4126
+ "loss": 1.06,
4127
+ "step": 562
4128
+ },
4129
+ {
4130
+ "epoch": 1.2201828648831696,
4131
+ "grad_norm": 0.4053063690662384,
4132
+ "learning_rate": 2.1151212989762815e-05,
4133
+ "loss": 2.0825,
4134
+ "step": 563
4135
+ },
4136
+ {
4137
+ "epoch": 1.2223501523874027,
4138
+ "grad_norm": 0.3882395923137665,
4139
+ "learning_rate": 2.1073343325667632e-05,
4140
+ "loss": 1.9561,
4141
+ "step": 564
4142
+ },
4143
+ {
4144
+ "epoch": 1.2245174398916356,
4145
+ "grad_norm": 0.40940290689468384,
4146
+ "learning_rate": 2.0998184515234558e-05,
4147
+ "loss": 1.8549,
4148
+ "step": 565
4149
+ },
4150
+ {
4151
+ "epoch": 1.2266847273958685,
4152
+ "grad_norm": 0.4803662896156311,
4153
+ "learning_rate": 2.0925738849616136e-05,
4154
+ "loss": 1.9017,
4155
+ "step": 566
4156
+ },
4157
+ {
4158
+ "epoch": 1.2288520149001017,
4159
+ "grad_norm": 0.4790099561214447,
4160
+ "learning_rate": 2.0856008537257003e-05,
4161
+ "loss": 1.8168,
4162
+ "step": 567
4163
+ },
4164
+ {
4165
+ "epoch": 1.2310193024043345,
4166
+ "grad_norm": 0.4733673334121704,
4167
+ "learning_rate": 2.078899570382657e-05,
4168
+ "loss": 1.9583,
4169
+ "step": 568
4170
+ },
4171
+ {
4172
+ "epoch": 1.2331865899085677,
4173
+ "grad_norm": 0.48890408873558044,
4174
+ "learning_rate": 2.0724702392154168e-05,
4175
+ "loss": 1.9115,
4176
+ "step": 569
4177
+ },
4178
+ {
4179
+ "epoch": 1.2353538774128006,
4180
+ "grad_norm": 0.4990405738353729,
4181
+ "learning_rate": 2.0663130562166867e-05,
4182
+ "loss": 1.6618,
4183
+ "step": 570
4184
+ },
4185
+ {
4186
+ "epoch": 1.2375211649170335,
4187
+ "grad_norm": 0.5173394083976746,
4188
+ "learning_rate": 2.0604282090829626e-05,
4189
+ "loss": 1.62,
4190
+ "step": 571
4191
+ },
4192
+ {
4193
+ "epoch": 1.2396884524212666,
4194
+ "grad_norm": 0.47291800379753113,
4195
+ "learning_rate": 2.0548158772088183e-05,
4196
+ "loss": 1.6012,
4197
+ "step": 572
4198
+ },
4199
+ {
4200
+ "epoch": 1.2418557399254995,
4201
+ "grad_norm": 0.5543546676635742,
4202
+ "learning_rate": 2.0494762316814265e-05,
4203
+ "loss": 2.2266,
4204
+ "step": 573
4205
+ },
4206
+ {
4207
+ "epoch": 1.2440230274297326,
4208
+ "grad_norm": 0.5711101293563843,
4209
+ "learning_rate": 2.044409435275349e-05,
4210
+ "loss": 2.1061,
4211
+ "step": 574
4212
+ },
4213
+ {
4214
+ "epoch": 1.2461903149339655,
4215
+ "grad_norm": 0.5323336720466614,
4216
+ "learning_rate": 2.0396156424475758e-05,
4217
+ "loss": 1.7618,
4218
+ "step": 575
4219
+ },
4220
+ {
4221
+ "epoch": 1.2461903149339655,
4222
+ "eval_loss": 1.4941222667694092,
4223
+ "eval_runtime": 1.9256,
4224
+ "eval_samples_per_second": 25.965,
4225
+ "eval_steps_per_second": 25.965,
4226
+ "step": 575
4227
+ },
4228
+ {
4229
+ "epoch": 1.2483576024381984,
4230
+ "grad_norm": 0.5417742729187012,
4231
+ "learning_rate": 2.035094999332813e-05,
4232
+ "loss": 1.9826,
4233
+ "step": 576
4234
+ },
4235
+ {
4236
+ "epoch": 1.2505248899424315,
4237
+ "grad_norm": 0.5711143016815186,
4238
+ "learning_rate": 2.0308476437390292e-05,
4239
+ "loss": 2.1701,
4240
+ "step": 577
4241
+ },
4242
+ {
4243
+ "epoch": 1.2526921774466644,
4244
+ "grad_norm": 0.5723181962966919,
4245
+ "learning_rate": 2.0268737051432534e-05,
4246
+ "loss": 1.8376,
4247
+ "step": 578
4248
+ },
4249
+ {
4250
+ "epoch": 1.2548594649508975,
4251
+ "grad_norm": 0.6304724216461182,
4252
+ "learning_rate": 2.0231733046876336e-05,
4253
+ "loss": 2.2109,
4254
+ "step": 579
4255
+ },
4256
+ {
4257
+ "epoch": 1.2570267524551304,
4258
+ "grad_norm": 0.49579569697380066,
4259
+ "learning_rate": 2.019746555175737e-05,
4260
+ "loss": 1.8671,
4261
+ "step": 580
4262
+ },
4263
+ {
4264
+ "epoch": 1.2591940399593633,
4265
+ "grad_norm": 0.5718152523040771,
4266
+ "learning_rate": 2.0165935610691136e-05,
4267
+ "loss": 2.0108,
4268
+ "step": 581
4269
+ },
4270
+ {
4271
+ "epoch": 1.2613613274635964,
4272
+ "grad_norm": 0.5495701432228088,
4273
+ "learning_rate": 2.0137144184841137e-05,
4274
+ "loss": 1.7419,
4275
+ "step": 582
4276
+ },
4277
+ {
4278
+ "epoch": 1.2635286149678293,
4279
+ "grad_norm": 0.5827396512031555,
4280
+ "learning_rate": 2.0111092151889548e-05,
4281
+ "loss": 2.0878,
4282
+ "step": 583
4283
+ },
4284
+ {
4285
+ "epoch": 1.2656959024720624,
4286
+ "grad_norm": 0.5941032767295837,
4287
+ "learning_rate": 2.00877803060105e-05,
4288
+ "loss": 2.0261,
4289
+ "step": 584
4290
+ },
4291
+ {
4292
+ "epoch": 1.2678631899762953,
4293
+ "grad_norm": 0.6075966954231262,
4294
+ "learning_rate": 2.006720935784581e-05,
4295
+ "loss": 1.9424,
4296
+ "step": 585
4297
+ },
4298
+ {
4299
+ "epoch": 1.2700304774805282,
4300
+ "grad_norm": 0.6470325589179993,
4301
+ "learning_rate": 2.0049379934483398e-05,
4302
+ "loss": 2.0216,
4303
+ "step": 586
4304
+ },
4305
+ {
4306
+ "epoch": 1.2721977649847613,
4307
+ "grad_norm": 0.5846126675605774,
4308
+ "learning_rate": 2.0034292579438092e-05,
4309
+ "loss": 1.9047,
4310
+ "step": 587
4311
+ },
4312
+ {
4313
+ "epoch": 1.2743650524889942,
4314
+ "grad_norm": 0.6144323945045471,
4315
+ "learning_rate": 2.00219477526351e-05,
4316
+ "loss": 1.897,
4317
+ "step": 588
4318
+ },
4319
+ {
4320
+ "epoch": 1.2765323399932273,
4321
+ "grad_norm": 0.5959208607673645,
4322
+ "learning_rate": 2.0012345830396012e-05,
4323
+ "loss": 1.7803,
4324
+ "step": 589
4325
+ },
4326
+ {
4327
+ "epoch": 1.2786996274974602,
4328
+ "grad_norm": 0.6328864097595215,
4329
+ "learning_rate": 2.0005487105427258e-05,
4330
+ "loss": 1.8311,
4331
+ "step": 590
4332
+ },
4333
+ {
4334
+ "epoch": 1.2808669150016931,
4335
+ "grad_norm": 0.6484797596931458,
4336
+ "learning_rate": 2.0001371786811258e-05,
4337
+ "loss": 1.9282,
4338
+ "step": 591
4339
+ },
4340
+ {
4341
+ "epoch": 1.2830342025059263,
4342
+ "grad_norm": 0.6196519136428833,
4343
+ "learning_rate": 2e-05,
4344
+ "loss": 1.7637,
4345
+ "step": 592
4346
  }
4347
  ],
4348
  "logging_steps": 1,
 
4357
  "early_stopping_threshold": 0.0
4358
  },
4359
  "attributes": {
4360
+ "early_stopping_patience_counter": 1
4361
  }
4362
  },
4363
  "TrainerControl": {
 
4366
  "should_evaluate": false,
4367
  "should_log": false,
4368
  "should_save": true,
4369
+ "should_training_stop": true
4370
  },
4371
  "attributes": {}
4372
  }
4373
  },
4374
+ "total_flos": 1.943692451244933e+17,
4375
  "train_batch_size": 1,
4376
  "trial_name": null,
4377
  "trial_params": null