Fanucci commited on
Commit
be1feb1
·
verified ·
1 Parent(s): 551ce9e

Training in progress, step 4140, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ab8ff796d5a0aa9544b801ac8d000c5cf5a70ba6cb8b4bfa68d15836f013ec94
3
  size 201892112
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f5cadbe51d559db96e86eef9f534a0bee2e0ea99aba510e0a28855ed1d0bc70a
3
  size 201892112
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:57231fda362f945f0352fb4a397d80ba901f8ed5b3852b72e464229f69083c39
3
  size 102864868
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:006b347290e03dad49fc36373c65ac91bbd8fd020705f94522a4e04e3bff7b95
3
  size 102864868
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8b664f6b5d5ee8fdd05af1164b71dfb90e4707b01450078266786504ef12d0e3
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1b68d5a1b9faff73cbbe06449508758f11cf04de1e43e5b3a13b45cefeff99b5
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e7b212df8169f0614cd9da7e49bb8587466d43a3f9ec4a30c90bea47b05a20c1
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f6dd0b5405eeec1dc6f96e40b1a34ef593661c41170d56934a252e76e7889804
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 0.6455243229866028,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-4100",
4
- "epoch": 0.44519850695622665,
5
  "eval_steps": 100,
6
- "global_step": 4100,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -29043,6 +29043,286 @@
29043
  "eval_samples_per_second": 25.811,
29044
  "eval_steps_per_second": 6.453,
29045
  "step": 4100
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29046
  }
29047
  ],
29048
  "logging_steps": 1,
@@ -29066,12 +29346,12 @@
29066
  "should_evaluate": false,
29067
  "should_log": false,
29068
  "should_save": true,
29069
- "should_training_stop": false
29070
  },
29071
  "attributes": {}
29072
  }
29073
  },
29074
- "total_flos": 1.805389684939948e+18,
29075
  "train_batch_size": 4,
29076
  "trial_name": null,
29077
  "trial_params": null
 
1
  {
2
  "best_metric": 0.6455243229866028,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-4100",
4
+ "epoch": 0.4495419070240923,
5
  "eval_steps": 100,
6
+ "global_step": 4140,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
29043
  "eval_samples_per_second": 25.811,
29044
  "eval_steps_per_second": 6.453,
29045
  "step": 4100
29046
+ },
29047
+ {
29048
+ "epoch": 0.4453070919579233,
29049
+ "grad_norm": 0.3129846751689911,
29050
+ "learning_rate": 4.4001452062236715e-08,
29051
+ "loss": 0.6301,
29052
+ "step": 4101
29053
+ },
29054
+ {
29055
+ "epoch": 0.44541567695962,
29056
+ "grad_norm": 0.3126542270183563,
29057
+ "learning_rate": 4.1774051735721244e-08,
29058
+ "loss": 0.6597,
29059
+ "step": 4102
29060
+ },
29061
+ {
29062
+ "epoch": 0.4455242619613166,
29063
+ "grad_norm": 0.3281303942203522,
29064
+ "learning_rate": 3.9604490060962674e-08,
29065
+ "loss": 0.7234,
29066
+ "step": 4103
29067
+ },
29068
+ {
29069
+ "epoch": 0.44563284696301325,
29070
+ "grad_norm": 0.28728532791137695,
29071
+ "learning_rate": 3.749276829333459e-08,
29072
+ "loss": 0.5878,
29073
+ "step": 4104
29074
+ },
29075
+ {
29076
+ "epoch": 0.4457414319647099,
29077
+ "grad_norm": 0.3254416882991791,
29078
+ "learning_rate": 3.5438887654737355e-08,
29079
+ "loss": 0.7285,
29080
+ "step": 4105
29081
+ },
29082
+ {
29083
+ "epoch": 0.4458500169664065,
29084
+ "grad_norm": 0.33132821321487427,
29085
+ "learning_rate": 3.344284933360919e-08,
29086
+ "loss": 0.7421,
29087
+ "step": 4106
29088
+ },
29089
+ {
29090
+ "epoch": 0.44595860196810316,
29091
+ "grad_norm": 0.32671990990638733,
29092
+ "learning_rate": 3.150465448490403e-08,
29093
+ "loss": 0.6265,
29094
+ "step": 4107
29095
+ },
29096
+ {
29097
+ "epoch": 0.4460671869697998,
29098
+ "grad_norm": 0.325366348028183,
29099
+ "learning_rate": 2.962430423011364e-08,
29100
+ "loss": 0.7648,
29101
+ "step": 4108
29102
+ },
29103
+ {
29104
+ "epoch": 0.44617577197149644,
29105
+ "grad_norm": 0.30909645557403564,
29106
+ "learning_rate": 2.7801799657278805e-08,
29107
+ "loss": 0.6806,
29108
+ "step": 4109
29109
+ },
29110
+ {
29111
+ "epoch": 0.4462843569731931,
29112
+ "grad_norm": 0.3081817924976349,
29113
+ "learning_rate": 2.6037141820933753e-08,
29114
+ "loss": 0.6483,
29115
+ "step": 4110
29116
+ },
29117
+ {
29118
+ "epoch": 0.4463929419748897,
29119
+ "grad_norm": 0.28678232431411743,
29120
+ "learning_rate": 2.4330331742172806e-08,
29121
+ "loss": 0.53,
29122
+ "step": 4111
29123
+ },
29124
+ {
29125
+ "epoch": 0.44650152697658635,
29126
+ "grad_norm": 0.29590025544166565,
29127
+ "learning_rate": 2.268137040859486e-08,
29128
+ "loss": 0.6223,
29129
+ "step": 4112
29130
+ },
29131
+ {
29132
+ "epoch": 0.446610111978283,
29133
+ "grad_norm": 0.3081272542476654,
29134
+ "learning_rate": 2.109025877433668e-08,
29135
+ "loss": 0.6641,
29136
+ "step": 4113
29137
+ },
29138
+ {
29139
+ "epoch": 0.4467186969799796,
29140
+ "grad_norm": 0.3248719573020935,
29141
+ "learning_rate": 1.955699776006181e-08,
29142
+ "loss": 0.7767,
29143
+ "step": 4114
29144
+ },
29145
+ {
29146
+ "epoch": 0.44682728198167626,
29147
+ "grad_norm": 0.312339186668396,
29148
+ "learning_rate": 1.808158825297168e-08,
29149
+ "loss": 0.6629,
29150
+ "step": 4115
29151
+ },
29152
+ {
29153
+ "epoch": 0.4469358669833729,
29154
+ "grad_norm": 0.3206634819507599,
29155
+ "learning_rate": 1.666403110676118e-08,
29156
+ "loss": 0.649,
29157
+ "step": 4116
29158
+ },
29159
+ {
29160
+ "epoch": 0.44704445198506954,
29161
+ "grad_norm": 0.30358952283859253,
29162
+ "learning_rate": 1.530432714167418e-08,
29163
+ "loss": 0.6306,
29164
+ "step": 4117
29165
+ },
29166
+ {
29167
+ "epoch": 0.44715303698676623,
29168
+ "grad_norm": 0.2956608831882477,
29169
+ "learning_rate": 1.4002477144470227e-08,
29170
+ "loss": 0.5967,
29171
+ "step": 4118
29172
+ },
29173
+ {
29174
+ "epoch": 0.44726162198846287,
29175
+ "grad_norm": 0.31782612204551697,
29176
+ "learning_rate": 1.275848186845785e-08,
29177
+ "loss": 0.6721,
29178
+ "step": 4119
29179
+ },
29180
+ {
29181
+ "epoch": 0.4473702069901595,
29182
+ "grad_norm": 0.28622129559516907,
29183
+ "learning_rate": 1.1572342033416839e-08,
29184
+ "loss": 0.6106,
29185
+ "step": 4120
29186
+ },
29187
+ {
29188
+ "epoch": 0.44747879199185614,
29189
+ "grad_norm": 0.3039482533931732,
29190
+ "learning_rate": 1.044405832569817e-08,
29191
+ "loss": 0.7213,
29192
+ "step": 4121
29193
+ },
29194
+ {
29195
+ "epoch": 0.4475873769935528,
29196
+ "grad_norm": 0.3242916464805603,
29197
+ "learning_rate": 9.373631398157389e-09,
29198
+ "loss": 0.7128,
29199
+ "step": 4122
29200
+ },
29201
+ {
29202
+ "epoch": 0.4476959619952494,
29203
+ "grad_norm": 0.30805718898773193,
29204
+ "learning_rate": 8.361061870176823e-09,
29205
+ "loss": 0.5978,
29206
+ "step": 4123
29207
+ },
29208
+ {
29209
+ "epoch": 0.44780454699694605,
29210
+ "grad_norm": 0.3264826536178589,
29211
+ "learning_rate": 7.4063503276544655e-09,
29212
+ "loss": 0.709,
29213
+ "step": 4124
29214
+ },
29215
+ {
29216
+ "epoch": 0.4479131319986427,
29217
+ "grad_norm": 0.3391072452068329,
29218
+ "learning_rate": 6.50949732301509e-09,
29219
+ "loss": 0.712,
29220
+ "step": 4125
29221
+ },
29222
+ {
29223
+ "epoch": 0.4480217170003393,
29224
+ "grad_norm": 0.3170183598995209,
29225
+ "learning_rate": 5.670503375188041e-09,
29226
+ "loss": 0.6444,
29227
+ "step": 4126
29228
+ },
29229
+ {
29230
+ "epoch": 0.44813030200203596,
29231
+ "grad_norm": 0.32671859860420227,
29232
+ "learning_rate": 4.889368969662744e-09,
29233
+ "loss": 0.7363,
29234
+ "step": 4127
29235
+ },
29236
+ {
29237
+ "epoch": 0.4482388870037326,
29238
+ "grad_norm": 0.34119758009910583,
29239
+ "learning_rate": 4.1660945584109936e-09,
29240
+ "loss": 0.6435,
29241
+ "step": 4128
29242
+ },
29243
+ {
29244
+ "epoch": 0.44834747200542924,
29245
+ "grad_norm": 0.3383527100086212,
29246
+ "learning_rate": 3.5006805599424596e-09,
29247
+ "loss": 0.7669,
29248
+ "step": 4129
29249
+ },
29250
+ {
29251
+ "epoch": 0.4484560570071259,
29252
+ "grad_norm": 0.2880455553531647,
29253
+ "learning_rate": 2.893127359282488e-09,
29254
+ "loss": 0.6149,
29255
+ "step": 4130
29256
+ },
29257
+ {
29258
+ "epoch": 0.4485646420088225,
29259
+ "grad_norm": 0.3154943585395813,
29260
+ "learning_rate": 2.3434353079831996e-09,
29261
+ "loss": 0.701,
29262
+ "step": 4131
29263
+ },
29264
+ {
29265
+ "epoch": 0.44867322701051915,
29266
+ "grad_norm": 0.3077964782714844,
29267
+ "learning_rate": 1.851604724112388e-09,
29268
+ "loss": 0.6714,
29269
+ "step": 4132
29270
+ },
29271
+ {
29272
+ "epoch": 0.4487818120122158,
29273
+ "grad_norm": 0.3311520218849182,
29274
+ "learning_rate": 1.4176358922535216e-09,
29275
+ "loss": 0.7025,
29276
+ "step": 4133
29277
+ },
29278
+ {
29279
+ "epoch": 0.4488903970139125,
29280
+ "grad_norm": 0.3134852945804596,
29281
+ "learning_rate": 1.041529063516844e-09,
29282
+ "loss": 0.6822,
29283
+ "step": 4134
29284
+ },
29285
+ {
29286
+ "epoch": 0.4489989820156091,
29287
+ "grad_norm": 0.2992844581604004,
29288
+ "learning_rate": 7.232844555282725e-10,
29289
+ "loss": 0.6657,
29290
+ "step": 4135
29291
+ },
29292
+ {
29293
+ "epoch": 0.44910756701730575,
29294
+ "grad_norm": 0.2949857711791992,
29295
+ "learning_rate": 4.629022524182958e-10,
29296
+ "loss": 0.6671,
29297
+ "step": 4136
29298
+ },
29299
+ {
29300
+ "epoch": 0.4492161520190024,
29301
+ "grad_norm": 0.3150671422481537,
29302
+ "learning_rate": 2.603826048774849e-10,
29303
+ "loss": 0.657,
29304
+ "step": 4137
29305
+ },
29306
+ {
29307
+ "epoch": 0.449324737020699,
29308
+ "grad_norm": 0.3099953234195709,
29309
+ "learning_rate": 1.1572563006767567e-10,
29310
+ "loss": 0.6787,
29311
+ "step": 4138
29312
+ },
29313
+ {
29314
+ "epoch": 0.44943332202239566,
29315
+ "grad_norm": 0.28630250692367554,
29316
+ "learning_rate": 2.8931411699684164e-11,
29317
+ "loss": 0.5811,
29318
+ "step": 4139
29319
+ },
29320
+ {
29321
+ "epoch": 0.4495419070240923,
29322
+ "grad_norm": 0.31658029556274414,
29323
+ "learning_rate": 0.0,
29324
+ "loss": 0.6864,
29325
+ "step": 4140
29326
  }
29327
  ],
29328
  "logging_steps": 1,
 
29346
  "should_evaluate": false,
29347
  "should_log": false,
29348
  "should_save": true,
29349
+ "should_training_stop": true
29350
  },
29351
  "attributes": {}
29352
  }
29353
  },
29354
+ "total_flos": 1.822881497330221e+18,
29355
  "train_batch_size": 4,
29356
  "trial_name": null,
29357
  "trial_params": null