error577 commited on
Commit
e393328
·
verified ·
1 Parent(s): 50a5d52

Training in progress, step 600, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9ad721f183047deb9f9941632532d95dd5c6f94a75258458114ad606e6c79588
3
  size 590925768
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d148803dec3242bacfa6090b788cc33df772e93b875035cf7e3a3d8c47dc0693
3
  size 590925768
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ba070a7b4f0eda9b4e553807c2914aafea5dfe03e7fed45faf09722646313126
3
  size 301533378
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f2a243d4609f2848bbcd91d030f48333676cc98c340050529214e8a4c5cbc451
3
  size 301533378
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d7ddc28fcb0d59d0c03a7ff796860f75248a656670e1261d481e7debe016a6dc
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5871d01553a694652bee01b474f7aa31386bfbdbca60584c13070b444a88c461
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7ef5f6fa421e52c5f015f38dbc8da890b8b41a4a78c203dae039e2fccfcbcb95
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0541d0446a58575a8e1bf0f51453829134e69b4b4f483226a260165a57705a1c
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 0.5511385202407837,
3
- "best_model_checkpoint": "miner_id_24/checkpoint-550",
4
- "epoch": 0.06160739288714646,
5
  "eval_steps": 50,
6
- "global_step": 550,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -3953,6 +3953,364 @@
3953
  "eval_samples_per_second": 9.607,
3954
  "eval_steps_per_second": 9.607,
3955
  "step": 550
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3956
  }
3957
  ],
3958
  "logging_steps": 1,
@@ -3976,12 +4334,12 @@
3976
  "should_evaluate": false,
3977
  "should_log": false,
3978
  "should_save": true,
3979
- "should_training_stop": false
3980
  },
3981
  "attributes": {}
3982
  }
3983
  },
3984
- "total_flos": 5.065030154531635e+16,
3985
  "train_batch_size": 1,
3986
  "trial_name": null,
3987
  "trial_params": null
 
1
  {
2
+ "best_metric": 0.5509313941001892,
3
+ "best_model_checkpoint": "miner_id_24/checkpoint-600",
4
+ "epoch": 0.06720806496779613,
5
  "eval_steps": 50,
6
+ "global_step": 600,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
3953
  "eval_samples_per_second": 9.607,
3954
  "eval_steps_per_second": 9.607,
3955
  "step": 550
3956
+ },
3957
+ {
3958
+ "epoch": 0.06171940632875945,
3959
+ "grad_norm": 0.12340470403432846,
3960
+ "learning_rate": 5.07672955698109e-06,
3961
+ "loss": 0.529,
3962
+ "step": 551
3963
+ },
3964
+ {
3965
+ "epoch": 0.061831419770372445,
3966
+ "grad_norm": 0.16562983393669128,
3967
+ "learning_rate": 4.872748404735644e-06,
3968
+ "loss": 0.4595,
3969
+ "step": 552
3970
+ },
3971
+ {
3972
+ "epoch": 0.061943433211985435,
3973
+ "grad_norm": 0.15129025280475616,
3974
+ "learning_rate": 4.6728820045062954e-06,
3975
+ "loss": 0.6089,
3976
+ "step": 553
3977
+ },
3978
+ {
3979
+ "epoch": 0.06205544665359843,
3980
+ "grad_norm": 0.1321287453174591,
3981
+ "learning_rate": 4.477136023048727e-06,
3982
+ "loss": 0.5442,
3983
+ "step": 554
3984
+ },
3985
+ {
3986
+ "epoch": 0.06216746009521142,
3987
+ "grad_norm": 0.1615147441625595,
3988
+ "learning_rate": 4.285516010293522e-06,
3989
+ "loss": 0.5495,
3990
+ "step": 555
3991
+ },
3992
+ {
3993
+ "epoch": 0.06227947353682442,
3994
+ "grad_norm": 0.1276487559080124,
3995
+ "learning_rate": 4.098027399188802e-06,
3996
+ "loss": 0.4491,
3997
+ "step": 556
3998
+ },
3999
+ {
4000
+ "epoch": 0.06239148697843741,
4001
+ "grad_norm": 0.1305229514837265,
4002
+ "learning_rate": 3.914675505546277e-06,
4003
+ "loss": 0.63,
4004
+ "step": 557
4005
+ },
4006
+ {
4007
+ "epoch": 0.0625035004200504,
4008
+ "grad_norm": 0.13546405732631683,
4009
+ "learning_rate": 3.735465527890458e-06,
4010
+ "loss": 0.6798,
4011
+ "step": 558
4012
+ },
4013
+ {
4014
+ "epoch": 0.0626155138616634,
4015
+ "grad_norm": 0.13185566663742065,
4016
+ "learning_rate": 3.560402547311275e-06,
4017
+ "loss": 0.575,
4018
+ "step": 559
4019
+ },
4020
+ {
4021
+ "epoch": 0.0627275273032764,
4022
+ "grad_norm": 0.13408592343330383,
4023
+ "learning_rate": 3.3894915273199987e-06,
4024
+ "loss": 0.723,
4025
+ "step": 560
4026
+ },
4027
+ {
4028
+ "epoch": 0.06283954074488939,
4029
+ "grad_norm": 0.11883525550365448,
4030
+ "learning_rate": 3.2227373137085954e-06,
4031
+ "loss": 0.4747,
4032
+ "step": 561
4033
+ },
4034
+ {
4035
+ "epoch": 0.06295155418650238,
4036
+ "grad_norm": 0.14109228551387787,
4037
+ "learning_rate": 3.0601446344122095e-06,
4038
+ "loss": 0.4895,
4039
+ "step": 562
4040
+ },
4041
+ {
4042
+ "epoch": 0.06306356762811538,
4043
+ "grad_norm": 0.14065487682819366,
4044
+ "learning_rate": 2.9017180993752e-06,
4045
+ "loss": 0.7272,
4046
+ "step": 563
4047
+ },
4048
+ {
4049
+ "epoch": 0.06317558106972837,
4050
+ "grad_norm": 0.13580210506916046,
4051
+ "learning_rate": 2.7474622004204304e-06,
4052
+ "loss": 0.5744,
4053
+ "step": 564
4054
+ },
4055
+ {
4056
+ "epoch": 0.06328759451134136,
4057
+ "grad_norm": 0.13952845335006714,
4058
+ "learning_rate": 2.5973813111218546e-06,
4059
+ "loss": 0.3957,
4060
+ "step": 565
4061
+ },
4062
+ {
4063
+ "epoch": 0.06339960795295435,
4064
+ "grad_norm": 0.1433325558900833,
4065
+ "learning_rate": 2.4514796866805964e-06,
4066
+ "loss": 0.4962,
4067
+ "step": 566
4068
+ },
4069
+ {
4070
+ "epoch": 0.06351162139456734,
4071
+ "grad_norm": 0.14881394803524017,
4072
+ "learning_rate": 2.3097614638042493e-06,
4073
+ "loss": 0.5508,
4074
+ "step": 567
4075
+ },
4076
+ {
4077
+ "epoch": 0.06362363483618035,
4078
+ "grad_norm": 0.13152483105659485,
4079
+ "learning_rate": 2.1722306605896022e-06,
4080
+ "loss": 0.5332,
4081
+ "step": 568
4082
+ },
4083
+ {
4084
+ "epoch": 0.06373564827779334,
4085
+ "grad_norm": 0.15379270911216736,
4086
+ "learning_rate": 2.0388911764086966e-06,
4087
+ "loss": 0.5214,
4088
+ "step": 569
4089
+ },
4090
+ {
4091
+ "epoch": 0.06384766171940633,
4092
+ "grad_norm": 0.14026005566120148,
4093
+ "learning_rate": 1.909746791798317e-06,
4094
+ "loss": 0.473,
4095
+ "step": 570
4096
+ },
4097
+ {
4098
+ "epoch": 0.06395967516101932,
4099
+ "grad_norm": 0.14658816158771515,
4100
+ "learning_rate": 1.7848011683527562e-06,
4101
+ "loss": 0.467,
4102
+ "step": 571
4103
+ },
4104
+ {
4105
+ "epoch": 0.06407168860263232,
4106
+ "grad_norm": 0.14597094058990479,
4107
+ "learning_rate": 1.6640578486200373e-06,
4108
+ "loss": 0.5426,
4109
+ "step": 572
4110
+ },
4111
+ {
4112
+ "epoch": 0.06418370204424531,
4113
+ "grad_norm": 0.14672952890396118,
4114
+ "learning_rate": 1.5475202560014054e-06,
4115
+ "loss": 0.623,
4116
+ "step": 573
4117
+ },
4118
+ {
4119
+ "epoch": 0.0642957154858583,
4120
+ "grad_norm": 0.1516023725271225,
4121
+ "learning_rate": 1.435191694654375e-06,
4122
+ "loss": 0.6358,
4123
+ "step": 574
4124
+ },
4125
+ {
4126
+ "epoch": 0.06440772892747129,
4127
+ "grad_norm": 0.13252241909503937,
4128
+ "learning_rate": 1.3270753493989373e-06,
4129
+ "loss": 0.4679,
4130
+ "step": 575
4131
+ },
4132
+ {
4133
+ "epoch": 0.0645197423690843,
4134
+ "grad_norm": 0.14411140978336334,
4135
+ "learning_rate": 1.2231742856273151e-06,
4136
+ "loss": 0.5655,
4137
+ "step": 576
4138
+ },
4139
+ {
4140
+ "epoch": 0.06463175581069729,
4141
+ "grad_norm": 0.15002818405628204,
4142
+ "learning_rate": 1.1234914492170678e-06,
4143
+ "loss": 0.5183,
4144
+ "step": 577
4145
+ },
4146
+ {
4147
+ "epoch": 0.06474376925231028,
4148
+ "grad_norm": 0.15125377476215363,
4149
+ "learning_rate": 1.0280296664475218e-06,
4150
+ "loss": 0.5556,
4151
+ "step": 578
4152
+ },
4153
+ {
4154
+ "epoch": 0.06485578269392327,
4155
+ "grad_norm": 0.153385192155838,
4156
+ "learning_rate": 9.367916439196709e-07,
4157
+ "loss": 0.4701,
4158
+ "step": 579
4159
+ },
4160
+ {
4161
+ "epoch": 0.06496779613553627,
4162
+ "grad_norm": 0.1459246724843979,
4163
+ "learning_rate": 8.49779968479436e-07,
4164
+ "loss": 0.5861,
4165
+ "step": 580
4166
+ },
4167
+ {
4168
+ "epoch": 0.06507980957714926,
4169
+ "grad_norm": 0.14465294778347015,
4170
+ "learning_rate": 7.669971071442738e-07,
4171
+ "loss": 0.581,
4172
+ "step": 581
4173
+ },
4174
+ {
4175
+ "epoch": 0.06519182301876225,
4176
+ "grad_norm": 0.14619335532188416,
4177
+ "learning_rate": 6.884454070333168e-07,
4178
+ "loss": 0.5257,
4179
+ "step": 582
4180
+ },
4181
+ {
4182
+ "epoch": 0.06530383646037524,
4183
+ "grad_norm": 0.13438697159290314,
4184
+ "learning_rate": 6.141270953007593e-07,
4185
+ "loss": 0.4356,
4186
+ "step": 583
4187
+ },
4188
+ {
4189
+ "epoch": 0.06541584990198823,
4190
+ "grad_norm": 0.1581222116947174,
4191
+ "learning_rate": 5.440442790727085e-07,
4192
+ "loss": 0.5663,
4193
+ "step": 584
4194
+ },
4195
+ {
4196
+ "epoch": 0.06552786334360124,
4197
+ "grad_norm": 0.15837247669696808,
4198
+ "learning_rate": 4.781989453874813e-07,
4199
+ "loss": 0.5957,
4200
+ "step": 585
4201
+ },
4202
+ {
4203
+ "epoch": 0.06563987678521423,
4204
+ "grad_norm": 0.14343412220478058,
4205
+ "learning_rate": 4.1659296113925046e-07,
4206
+ "loss": 0.4383,
4207
+ "step": 586
4208
+ },
4209
+ {
4210
+ "epoch": 0.06575189022682722,
4211
+ "grad_norm": 0.15165679156780243,
4212
+ "learning_rate": 3.592280730250863e-07,
4213
+ "loss": 0.5884,
4214
+ "step": 587
4215
+ },
4216
+ {
4217
+ "epoch": 0.06586390366844021,
4218
+ "grad_norm": 0.1490369737148285,
4219
+ "learning_rate": 3.06105907495513e-07,
4220
+ "loss": 0.4845,
4221
+ "step": 588
4222
+ },
4223
+ {
4224
+ "epoch": 0.06597591711005321,
4225
+ "grad_norm": 0.15211187303066254,
4226
+ "learning_rate": 2.572279707082625e-07,
4227
+ "loss": 0.5914,
4228
+ "step": 589
4229
+ },
4230
+ {
4231
+ "epoch": 0.0660879305516662,
4232
+ "grad_norm": 0.1439492255449295,
4233
+ "learning_rate": 2.125956484857083e-07,
4234
+ "loss": 0.506,
4235
+ "step": 590
4236
+ },
4237
+ {
4238
+ "epoch": 0.06619994399327919,
4239
+ "grad_norm": 0.15834854543209076,
4240
+ "learning_rate": 1.7221020627548043e-07,
4241
+ "loss": 0.6013,
4242
+ "step": 591
4243
+ },
4244
+ {
4245
+ "epoch": 0.06631195743489218,
4246
+ "grad_norm": 0.15449443459510803,
4247
+ "learning_rate": 1.3607278911462739e-07,
4248
+ "loss": 0.4803,
4249
+ "step": 592
4250
+ },
4251
+ {
4252
+ "epoch": 0.06642397087650519,
4253
+ "grad_norm": 0.16052158176898956,
4254
+ "learning_rate": 1.0418442159715879e-07,
4255
+ "loss": 0.4994,
4256
+ "step": 593
4257
+ },
4258
+ {
4259
+ "epoch": 0.06653598431811818,
4260
+ "grad_norm": 0.15226219594478607,
4261
+ "learning_rate": 7.654600784493536e-08,
4262
+ "loss": 0.4476,
4263
+ "step": 594
4264
+ },
4265
+ {
4266
+ "epoch": 0.06664799775973117,
4267
+ "grad_norm": 0.17426903545856476,
4268
+ "learning_rate": 5.315833148210602e-08,
4269
+ "loss": 0.5283,
4270
+ "step": 595
4271
+ },
4272
+ {
4273
+ "epoch": 0.06676001120134416,
4274
+ "grad_norm": 0.16931919753551483,
4275
+ "learning_rate": 3.402205561289229e-08,
4276
+ "loss": 0.5116,
4277
+ "step": 596
4278
+ },
4279
+ {
4280
+ "epoch": 0.06687202464295715,
4281
+ "grad_norm": 0.1650620847940445,
4282
+ "learning_rate": 1.9137722802686772e-08,
4283
+ "loss": 0.4657,
4284
+ "step": 597
4285
+ },
4286
+ {
4287
+ "epoch": 0.06698403808457015,
4288
+ "grad_norm": 0.1828121691942215,
4289
+ "learning_rate": 8.50575506278206e-09,
4290
+ "loss": 0.4424,
4291
+ "step": 598
4292
+ },
4293
+ {
4294
+ "epoch": 0.06709605152618314,
4295
+ "grad_norm": 0.19233034551143646,
4296
+ "learning_rate": 2.126453838380282e-09,
4297
+ "loss": 0.5529,
4298
+ "step": 599
4299
+ },
4300
+ {
4301
+ "epoch": 0.06720806496779613,
4302
+ "grad_norm": 0.24840380251407623,
4303
+ "learning_rate": 0.0,
4304
+ "loss": 0.5998,
4305
+ "step": 600
4306
+ },
4307
+ {
4308
+ "epoch": 0.06720806496779613,
4309
+ "eval_loss": 0.5509313941001892,
4310
+ "eval_runtime": 30.0076,
4311
+ "eval_samples_per_second": 9.564,
4312
+ "eval_steps_per_second": 9.564,
4313
+ "step": 600
4314
  }
4315
  ],
4316
  "logging_steps": 1,
 
4334
  "should_evaluate": false,
4335
  "should_log": false,
4336
  "should_save": true,
4337
+ "should_training_stop": true
4338
  },
4339
  "attributes": {}
4340
  }
4341
  },
4342
+ "total_flos": 5.535341585576755e+16,
4343
  "train_batch_size": 1,
4344
  "trial_name": null,
4345
  "trial_params": null