irishprancer commited on
Commit
6afe06f
·
verified ·
1 Parent(s): 5ded891

Training in progress, step 4650, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3a5a6f82a282e6ee513038360740bfa6163feb4b1d4b1bb3319d6b0ef1f4751f
3
  size 527048968
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7c2fa6d7c57abe80b81e686c5bb261331348a26463db489c0057496c21099267
3
  size 527048968
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a71d3e09cfa9bcbe2ed92701fe51af492bac444d4e688ae56a471982c181c9e9
3
  size 1054135994
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:798f7293795a2261c83bf45e42597af80061bea9a6acf124906fd2023d47a1d7
3
  size 1054135994
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:802e223144d189244ce5a768642009b3c15e29f14e41b4808f514470d4c7be6e
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4b03ef6d89f6c10452a8bc84393b1dc225e370d174364da48043ff472b287411
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0e092ded0b8103aeaa278f39556d48ebee944cc0f4cd6e8f95b6ba39b7752813
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f96e2618e1c452343a3740c32bea2247d1da5a3f0e229791fe5ed8f1e4e8eb3a
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 0.7166205048561096,
3
  "best_model_checkpoint": "./output/checkpoint-450",
4
- "epoch": 195.65217391304347,
5
  "eval_steps": 150,
6
- "global_step": 4500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -4357,6 +4357,151 @@
4357
  "EMA_steps_per_second": 25.401,
4358
  "epoch": 195.65217391304347,
4359
  "step": 4500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4360
  }
4361
  ],
4362
  "logging_steps": 10,
@@ -4376,7 +4521,7 @@
4376
  "attributes": {}
4377
  }
4378
  },
4379
- "total_flos": 1.1587388919393485e+17,
4380
  "train_batch_size": 4,
4381
  "trial_name": null,
4382
  "trial_params": null
 
1
  {
2
  "best_metric": 0.7166205048561096,
3
  "best_model_checkpoint": "./output/checkpoint-450",
4
+ "epoch": 202.17391304347825,
5
  "eval_steps": 150,
6
+ "global_step": 4650,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
4357
  "EMA_steps_per_second": 25.401,
4358
  "epoch": 195.65217391304347,
4359
  "step": 4500
4360
+ },
4361
+ {
4362
+ "epoch": 196.08695652173913,
4363
+ "grad_norm": 2.102900981903076,
4364
+ "learning_rate": 1.5299080151617038e-06,
4365
+ "loss": 0.2645,
4366
+ "step": 4510
4367
+ },
4368
+ {
4369
+ "epoch": 196.52173913043478,
4370
+ "grad_norm": 2.1095998287200928,
4371
+ "learning_rate": 1.5299014962042341e-06,
4372
+ "loss": 0.2165,
4373
+ "step": 4520
4374
+ },
4375
+ {
4376
+ "epoch": 196.95652173913044,
4377
+ "grad_norm": 2.1897032260894775,
4378
+ "learning_rate": 1.5298946740592704e-06,
4379
+ "loss": 0.2351,
4380
+ "step": 4530
4381
+ },
4382
+ {
4383
+ "epoch": 197.3913043478261,
4384
+ "grad_norm": 1.8536592721939087,
4385
+ "learning_rate": 1.5298875487295168e-06,
4386
+ "loss": 0.2502,
4387
+ "step": 4540
4388
+ },
4389
+ {
4390
+ "epoch": 197.82608695652175,
4391
+ "grad_norm": 1.948095679283142,
4392
+ "learning_rate": 1.5298801202177976e-06,
4393
+ "loss": 0.2185,
4394
+ "step": 4550
4395
+ },
4396
+ {
4397
+ "epoch": 198.2608695652174,
4398
+ "grad_norm": 2.084287643432617,
4399
+ "learning_rate": 1.529872388527057e-06,
4400
+ "loss": 0.2297,
4401
+ "step": 4560
4402
+ },
4403
+ {
4404
+ "epoch": 198.69565217391303,
4405
+ "grad_norm": 1.5842596292495728,
4406
+ "learning_rate": 1.5298643536603602e-06,
4407
+ "loss": 0.2311,
4408
+ "step": 4570
4409
+ },
4410
+ {
4411
+ "epoch": 199.1304347826087,
4412
+ "grad_norm": 2.214616537094116,
4413
+ "learning_rate": 1.5298560156208912e-06,
4414
+ "loss": 0.1927,
4415
+ "step": 4580
4416
+ },
4417
+ {
4418
+ "epoch": 199.56521739130434,
4419
+ "grad_norm": 1.5991625785827637,
4420
+ "learning_rate": 1.5298473744119554e-06,
4421
+ "loss": 0.2538,
4422
+ "step": 4590
4423
+ },
4424
+ {
4425
+ "epoch": 200.0,
4426
+ "grad_norm": 3.232147455215454,
4427
+ "learning_rate": 1.5298384300369777e-06,
4428
+ "loss": 0.1878,
4429
+ "step": 4600
4430
+ },
4431
+ {
4432
+ "epoch": 200.43478260869566,
4433
+ "grad_norm": 1.8239914178848267,
4434
+ "learning_rate": 1.5298291824995035e-06,
4435
+ "loss": 0.2398,
4436
+ "step": 4610
4437
+ },
4438
+ {
4439
+ "epoch": 200.8695652173913,
4440
+ "grad_norm": 1.616271734237671,
4441
+ "learning_rate": 1.5298196318031983e-06,
4442
+ "loss": 0.2276,
4443
+ "step": 4620
4444
+ },
4445
+ {
4446
+ "epoch": 201.30434782608697,
4447
+ "grad_norm": 1.999419093132019,
4448
+ "learning_rate": 1.5298097779518473e-06,
4449
+ "loss": 0.2271,
4450
+ "step": 4630
4451
+ },
4452
+ {
4453
+ "epoch": 201.7391304347826,
4454
+ "grad_norm": 2.4747536182403564,
4455
+ "learning_rate": 1.5297996209493567e-06,
4456
+ "loss": 0.1894,
4457
+ "step": 4640
4458
+ },
4459
+ {
4460
+ "epoch": 202.17391304347825,
4461
+ "grad_norm": 1.546221137046814,
4462
+ "learning_rate": 1.5297891607997524e-06,
4463
+ "loss": 0.2233,
4464
+ "step": 4650
4465
+ },
4466
+ {
4467
+ "epoch": 202.17391304347825,
4468
+ "eval_loss": 0.9768635034561157,
4469
+ "eval_runtime": 0.3903,
4470
+ "eval_samples_per_second": 25.62,
4471
+ "eval_steps_per_second": 25.62,
4472
+ "step": 4650
4473
+ },
4474
+ {
4475
+ "Start_State_loss": 0.8609819412231445,
4476
+ "Start_State_runtime": 0.3895,
4477
+ "Start_State_samples_per_second": 25.672,
4478
+ "Start_State_steps_per_second": 25.672,
4479
+ "epoch": 202.17391304347825,
4480
+ "step": 4650
4481
+ },
4482
+ {
4483
+ "Raw_Model_loss": 0.9768635034561157,
4484
+ "Raw_Model_runtime": 0.3902,
4485
+ "Raw_Model_samples_per_second": 25.625,
4486
+ "Raw_Model_steps_per_second": 25.625,
4487
+ "epoch": 202.17391304347825,
4488
+ "step": 4650
4489
+ },
4490
+ {
4491
+ "SWA_loss": 0.8143197894096375,
4492
+ "SWA_runtime": 0.3924,
4493
+ "SWA_samples_per_second": 25.484,
4494
+ "SWA_steps_per_second": 25.484,
4495
+ "epoch": 202.17391304347825,
4496
+ "step": 4650
4497
+ },
4498
+ {
4499
+ "EMA_loss": 0.8591374158859253,
4500
+ "EMA_runtime": 0.3893,
4501
+ "EMA_samples_per_second": 25.688,
4502
+ "EMA_steps_per_second": 25.688,
4503
+ "epoch": 202.17391304347825,
4504
+ "step": 4650
4505
  }
4506
  ],
4507
  "logging_steps": 10,
 
4521
  "attributes": {}
4522
  }
4523
  },
4524
+ "total_flos": 1.1967994202384794e+17,
4525
  "train_batch_size": 4,
4526
  "trial_name": null,
4527
  "trial_params": null