error577 commited on
Commit
08097d2
·
verified ·
1 Parent(s): 795781b

Training in progress, step 440, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5c0b202fc0c96405179f88a6c2eaa4f6dc3272d743fbddfecad7984da6b3126f
3
  size 639691872
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3adc18392d2f21276e1f3a5e51a07ca17b1146c2ef6b6e597fa70cbd35cb3bdb
3
  size 639691872
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:40a4057e0b09a78b12e194d176176ab3bdf1dba15e9b3d7cc076fde7fedd72a1
3
  size 1279647314
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0a2cfd564c67e428d0d90d8802e1970d22aafbab1ec924f4f6c956cccc8140e5
3
  size 1279647314
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8989144fab65b9563db39df1e14d31e2d7e7e0d841ffc9302a3d61f93bd4035c
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:32e08d0a211b4d4a818fb65e4db7285f388cba9154617f34f859f6540a9421f1
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5e62bbb669ef85272e1fb4d893e6c7bdc8670a8e92466433aef00a4eb78b394c
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:003818438c5b86338e0642cffcf6bb6b8eec0d2c9f4884b9d0b63566f2711618
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 2.3341846466064453,
3
- "best_model_checkpoint": "miner_id_24/checkpoint-420",
4
- "epoch": 0.1330034636318654,
5
  "eval_steps": 20,
6
- "global_step": 420,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -3123,6 +3123,154 @@
3123
  "eval_samples_per_second": 4.259,
3124
  "eval_steps_per_second": 4.259,
3125
  "step": 420
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3126
  }
3127
  ],
3128
  "logging_steps": 1,
@@ -3151,7 +3299,7 @@
3151
  "attributes": {}
3152
  }
3153
  },
3154
- "total_flos": 3.3441179726197555e+17,
3155
  "train_batch_size": 1,
3156
  "trial_name": null,
3157
  "trial_params": null
 
1
  {
2
+ "best_metric": 2.3223698139190674,
3
+ "best_model_checkpoint": "miner_id_24/checkpoint-440",
4
+ "epoch": 0.1393369619000495,
5
  "eval_steps": 20,
6
+ "global_step": 440,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
3123
  "eval_samples_per_second": 4.259,
3124
  "eval_steps_per_second": 4.259,
3125
  "step": 420
3126
+ },
3127
+ {
3128
+ "epoch": 0.1333201385452746,
3129
+ "grad_norm": 0.5786765813827515,
3130
+ "learning_rate": 0.00019942151943087426,
3131
+ "loss": 2.2436,
3132
+ "step": 421
3133
+ },
3134
+ {
3135
+ "epoch": 0.1336368134586838,
3136
+ "grad_norm": 0.5472723841667175,
3137
+ "learning_rate": 0.00019941791308263955,
3138
+ "loss": 2.211,
3139
+ "step": 422
3140
+ },
3141
+ {
3142
+ "epoch": 0.13395348837209303,
3143
+ "grad_norm": 0.5549196600914001,
3144
+ "learning_rate": 0.00019941429556082055,
3145
+ "loss": 2.3284,
3146
+ "step": 423
3147
+ },
3148
+ {
3149
+ "epoch": 0.13427016328550223,
3150
+ "grad_norm": 0.5404260754585266,
3151
+ "learning_rate": 0.00019941066686582394,
3152
+ "loss": 2.3089,
3153
+ "step": 424
3154
+ },
3155
+ {
3156
+ "epoch": 0.13458683819891143,
3157
+ "grad_norm": 0.622839093208313,
3158
+ "learning_rate": 0.00019940702699805743,
3159
+ "loss": 2.204,
3160
+ "step": 425
3161
+ },
3162
+ {
3163
+ "epoch": 0.13490351311232063,
3164
+ "grad_norm": 0.6209724545478821,
3165
+ "learning_rate": 0.00019940337595793017,
3166
+ "loss": 2.264,
3167
+ "step": 426
3168
+ },
3169
+ {
3170
+ "epoch": 0.13522018802572983,
3171
+ "grad_norm": 0.6113777756690979,
3172
+ "learning_rate": 0.00019939971374585252,
3173
+ "loss": 2.3617,
3174
+ "step": 427
3175
+ },
3176
+ {
3177
+ "epoch": 0.13553686293913905,
3178
+ "grad_norm": 0.591740608215332,
3179
+ "learning_rate": 0.000199396040362236,
3180
+ "loss": 2.2724,
3181
+ "step": 428
3182
+ },
3183
+ {
3184
+ "epoch": 0.13585353785254825,
3185
+ "grad_norm": 0.7021921277046204,
3186
+ "learning_rate": 0.00019939235580749353,
3187
+ "loss": 2.396,
3188
+ "step": 429
3189
+ },
3190
+ {
3191
+ "epoch": 0.13617021276595745,
3192
+ "grad_norm": 0.6321803331375122,
3193
+ "learning_rate": 0.00019938866008203918,
3194
+ "loss": 2.082,
3195
+ "step": 430
3196
+ },
3197
+ {
3198
+ "epoch": 0.13648688767936665,
3199
+ "grad_norm": 0.6896611452102661,
3200
+ "learning_rate": 0.00019938495318628832,
3201
+ "loss": 2.2072,
3202
+ "step": 431
3203
+ },
3204
+ {
3205
+ "epoch": 0.13680356259277585,
3206
+ "grad_norm": 0.670881986618042,
3207
+ "learning_rate": 0.00019938123512065758,
3208
+ "loss": 2.207,
3209
+ "step": 432
3210
+ },
3211
+ {
3212
+ "epoch": 0.13712023750618504,
3213
+ "grad_norm": 0.6715372204780579,
3214
+ "learning_rate": 0.00019937750588556484,
3215
+ "loss": 2.1882,
3216
+ "step": 433
3217
+ },
3218
+ {
3219
+ "epoch": 0.13743691241959427,
3220
+ "grad_norm": 0.7520514130592346,
3221
+ "learning_rate": 0.0001993737654814292,
3222
+ "loss": 2.3707,
3223
+ "step": 434
3224
+ },
3225
+ {
3226
+ "epoch": 0.13775358733300347,
3227
+ "grad_norm": 0.6770562529563904,
3228
+ "learning_rate": 0.00019937001390867105,
3229
+ "loss": 2.2745,
3230
+ "step": 435
3231
+ },
3232
+ {
3233
+ "epoch": 0.13807026224641267,
3234
+ "grad_norm": 0.7494112849235535,
3235
+ "learning_rate": 0.00019936625116771204,
3236
+ "loss": 2.2834,
3237
+ "step": 436
3238
+ },
3239
+ {
3240
+ "epoch": 0.13838693715982187,
3241
+ "grad_norm": 0.7565605044364929,
3242
+ "learning_rate": 0.0001993624772589751,
3243
+ "loss": 2.3396,
3244
+ "step": 437
3245
+ },
3246
+ {
3247
+ "epoch": 0.13870361207323106,
3248
+ "grad_norm": 0.8649305105209351,
3249
+ "learning_rate": 0.0001993586921828843,
3250
+ "loss": 2.3929,
3251
+ "step": 438
3252
+ },
3253
+ {
3254
+ "epoch": 0.1390202869866403,
3255
+ "grad_norm": 0.7562718391418457,
3256
+ "learning_rate": 0.0001993548959398651,
3257
+ "loss": 2.2906,
3258
+ "step": 439
3259
+ },
3260
+ {
3261
+ "epoch": 0.1393369619000495,
3262
+ "grad_norm": 0.8264215588569641,
3263
+ "learning_rate": 0.00019935108853034414,
3264
+ "loss": 2.4555,
3265
+ "step": 440
3266
+ },
3267
+ {
3268
+ "epoch": 0.1393369619000495,
3269
+ "eval_loss": 2.3223698139190674,
3270
+ "eval_runtime": 119.2317,
3271
+ "eval_samples_per_second": 4.261,
3272
+ "eval_steps_per_second": 4.261,
3273
+ "step": 440
3274
  }
3275
  ],
3276
  "logging_steps": 1,
 
3299
  "attributes": {}
3300
  }
3301
  },
3302
+ "total_flos": 3.47716755004588e+17,
3303
  "train_batch_size": 1,
3304
  "trial_name": null,
3305
  "trial_params": null