irishprancer commited on
Commit
3000626
·
verified ·
1 Parent(s): dac7a12

Training in progress, step 4500, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:21408a8c0f804aa69ccdc098361ffb0c38ee5eae2c774310be1c3d5f3d0c89cb
3
  size 527048968
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3a5a6f82a282e6ee513038360740bfa6163feb4b1d4b1bb3319d6b0ef1f4751f
3
  size 527048968
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1bcaa1a30846eab83cb0fb8aeeb387ec463414d9ba20d28e1e9aa81c65bf4680
3
  size 1054135994
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a71d3e09cfa9bcbe2ed92701fe51af492bac444d4e688ae56a471982c181c9e9
3
  size 1054135994
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8a9ea00016d252bf419fc0794eade190f54eb50118e2e0be5b9c332ef3c36fc0
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:802e223144d189244ce5a768642009b3c15e29f14e41b4808f514470d4c7be6e
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6e7ae917a4132ef2fbbbdadfebce9aa687102db21112c728e0ebfe527b807e8a
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0e092ded0b8103aeaa278f39556d48ebee944cc0f4cd6e8f95b6ba39b7752813
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 0.7166205048561096,
3
  "best_model_checkpoint": "./output/checkpoint-450",
4
- "epoch": 189.1304347826087,
5
  "eval_steps": 150,
6
- "global_step": 4350,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -4212,6 +4212,151 @@
4212
  "EMA_steps_per_second": 25.542,
4213
  "epoch": 189.1304347826087,
4214
  "step": 4350
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4215
  }
4216
  ],
4217
  "logging_steps": 10,
@@ -4231,7 +4376,7 @@
4231
  "attributes": {}
4232
  }
4233
  },
4234
- "total_flos": 1.1202954752684851e+17,
4235
  "train_batch_size": 4,
4236
  "trial_name": null,
4237
  "trial_params": null
 
1
  {
2
  "best_metric": 0.7166205048561096,
3
  "best_model_checkpoint": "./output/checkpoint-450",
4
+ "epoch": 195.65217391304347,
5
  "eval_steps": 150,
6
+ "global_step": 4500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
4212
  "EMA_steps_per_second": 25.542,
4213
  "epoch": 189.1304347826087,
4214
  "step": 4350
4215
+ },
4216
+ {
4217
+ "epoch": 189.56521739130434,
4218
+ "grad_norm": 1.9401793479919434,
4219
+ "learning_rate": 1.5299694155534387e-06,
4220
+ "loss": 0.2163,
4221
+ "step": 4360
4222
+ },
4223
+ {
4224
+ "epoch": 190.0,
4225
+ "grad_norm": 4.096744060516357,
4226
+ "learning_rate": 1.529967444651148e-06,
4227
+ "loss": 0.2344,
4228
+ "step": 4370
4229
+ },
4230
+ {
4231
+ "epoch": 190.43478260869566,
4232
+ "grad_norm": 2.7062318325042725,
4233
+ "learning_rate": 1.529965170535223e-06,
4234
+ "loss": 0.251,
4235
+ "step": 4380
4236
+ },
4237
+ {
4238
+ "epoch": 190.8695652173913,
4239
+ "grad_norm": 1.7941333055496216,
4240
+ "learning_rate": 1.5299625932065658e-06,
4241
+ "loss": 0.2192,
4242
+ "step": 4390
4243
+ },
4244
+ {
4245
+ "epoch": 191.30434782608697,
4246
+ "grad_norm": 2.2132506370544434,
4247
+ "learning_rate": 1.5299597126661977e-06,
4248
+ "loss": 0.2179,
4249
+ "step": 4400
4250
+ },
4251
+ {
4252
+ "epoch": 191.7391304347826,
4253
+ "grad_norm": 2.125366687774658,
4254
+ "learning_rate": 1.5299565289152606e-06,
4255
+ "loss": 0.2031,
4256
+ "step": 4410
4257
+ },
4258
+ {
4259
+ "epoch": 192.17391304347825,
4260
+ "grad_norm": 2.0995376110076904,
4261
+ "learning_rate": 1.5299530419550163e-06,
4262
+ "loss": 0.2472,
4263
+ "step": 4420
4264
+ },
4265
+ {
4266
+ "epoch": 192.6086956521739,
4267
+ "grad_norm": 2.151653289794922,
4268
+ "learning_rate": 1.529949251786847e-06,
4269
+ "loss": 0.2326,
4270
+ "step": 4430
4271
+ },
4272
+ {
4273
+ "epoch": 193.04347826086956,
4274
+ "grad_norm": 2.5126099586486816,
4275
+ "learning_rate": 1.5299451584122548e-06,
4276
+ "loss": 0.234,
4277
+ "step": 4440
4278
+ },
4279
+ {
4280
+ "epoch": 193.47826086956522,
4281
+ "grad_norm": 1.9897412061691284,
4282
+ "learning_rate": 1.5299407618328622e-06,
4283
+ "loss": 0.2401,
4284
+ "step": 4450
4285
+ },
4286
+ {
4287
+ "epoch": 193.91304347826087,
4288
+ "grad_norm": 2.143177032470703,
4289
+ "learning_rate": 1.5299360620504121e-06,
4290
+ "loss": 0.2344,
4291
+ "step": 4460
4292
+ },
4293
+ {
4294
+ "epoch": 194.34782608695653,
4295
+ "grad_norm": 2.5046348571777344,
4296
+ "learning_rate": 1.5299310590667677e-06,
4297
+ "loss": 0.2091,
4298
+ "step": 4470
4299
+ },
4300
+ {
4301
+ "epoch": 194.7826086956522,
4302
+ "grad_norm": 2.4033350944519043,
4303
+ "learning_rate": 1.529925752883911e-06,
4304
+ "loss": 0.2265,
4305
+ "step": 4480
4306
+ },
4307
+ {
4308
+ "epoch": 195.2173913043478,
4309
+ "grad_norm": 1.9208111763000488,
4310
+ "learning_rate": 1.529920143503946e-06,
4311
+ "loss": 0.2074,
4312
+ "step": 4490
4313
+ },
4314
+ {
4315
+ "epoch": 195.65217391304347,
4316
+ "grad_norm": 2.4695804119110107,
4317
+ "learning_rate": 1.5299142309290955e-06,
4318
+ "loss": 0.2067,
4319
+ "step": 4500
4320
+ },
4321
+ {
4322
+ "epoch": 195.65217391304347,
4323
+ "eval_loss": 0.9752073287963867,
4324
+ "eval_runtime": 0.4001,
4325
+ "eval_samples_per_second": 24.997,
4326
+ "eval_steps_per_second": 24.997,
4327
+ "step": 4500
4328
+ },
4329
+ {
4330
+ "Start_State_loss": 0.8609819412231445,
4331
+ "Start_State_runtime": 0.3943,
4332
+ "Start_State_samples_per_second": 25.364,
4333
+ "Start_State_steps_per_second": 25.364,
4334
+ "epoch": 195.65217391304347,
4335
+ "step": 4500
4336
+ },
4337
+ {
4338
+ "Raw_Model_loss": 0.9752073287963867,
4339
+ "Raw_Model_runtime": 0.3942,
4340
+ "Raw_Model_samples_per_second": 25.367,
4341
+ "Raw_Model_steps_per_second": 25.367,
4342
+ "epoch": 195.65217391304347,
4343
+ "step": 4500
4344
+ },
4345
+ {
4346
+ "SWA_loss": 0.8120683431625366,
4347
+ "SWA_runtime": 0.3958,
4348
+ "SWA_samples_per_second": 25.266,
4349
+ "SWA_steps_per_second": 25.266,
4350
+ "epoch": 195.65217391304347,
4351
+ "step": 4500
4352
+ },
4353
+ {
4354
+ "EMA_loss": 0.8598009943962097,
4355
+ "EMA_runtime": 0.3937,
4356
+ "EMA_samples_per_second": 25.401,
4357
+ "EMA_steps_per_second": 25.401,
4358
+ "epoch": 195.65217391304347,
4359
+ "step": 4500
4360
  }
4361
  ],
4362
  "logging_steps": 10,
 
4376
  "attributes": {}
4377
  }
4378
  },
4379
+ "total_flos": 1.1587388919393485e+17,
4380
  "train_batch_size": 4,
4381
  "trial_name": null,
4382
  "trial_params": null