irishprancer commited on
Commit
268a8dd
·
verified ·
1 Parent(s): 5c851e6

Training in progress, step 3600, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ad88d566a2de22280de9f59b5c145992a9fd5fe8fe840eb9fb608784af8061f4
3
  size 527048968
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5fcde8fff671a9e3943a7206216f5aa93c2c1394ad15e27c488e3a5ce5334895
3
  size 527048968
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1414fec916d0b25beb8d137f45e63474d39eeed8c60eddd52432b5ce01f3b4b6
3
  size 1054135994
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:efd66e2f145ff8ba7f09e1ad5b11fdf963e4fd8ce95a14181fab94269e9fb8ca
3
  size 1054135994
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:da7a2430b39e5e44cb64bbbf1f8e636e91b380491fea68813c5289acab7fa99d
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:44222b1bb3193020a7e558d8efc91533b7bf22b40de2edd049f9d11da894b760
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f94e1d8202f6c1b191d74521b1a79cc0e162b70d269a8fc663a163d38b719239
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3293218b6796a13a9f95a7300ab605072092402c0dbdc9fe7b53627646555830
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 0.7177689671516418,
3
  "best_model_checkpoint": "./output/checkpoint-450",
4
- "epoch": 150.0,
5
  "eval_steps": 150,
6
- "global_step": 3450,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -3342,6 +3342,151 @@
3342
  "EMA_steps_per_second": 24.892,
3343
  "epoch": 150.0,
3344
  "step": 3450
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3345
  }
3346
  ],
3347
  "logging_steps": 10,
@@ -3361,7 +3506,7 @@
3361
  "attributes": {}
3362
  }
3363
  },
3364
- "total_flos": 8.89909420608553e+16,
3365
  "train_batch_size": 4,
3366
  "trial_name": null,
3367
  "trial_params": null
 
1
  {
2
  "best_metric": 0.7177689671516418,
3
  "best_model_checkpoint": "./output/checkpoint-450",
4
+ "epoch": 156.52173913043478,
5
  "eval_steps": 150,
6
+ "global_step": 3600,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
3342
  "EMA_steps_per_second": 24.892,
3343
  "epoch": 150.0,
3344
  "step": 3450
3345
+ },
3346
+ {
3347
+ "epoch": 150.43478260869566,
3348
+ "grad_norm": 1.7044988870620728,
3349
+ "learning_rate": 2.513945738151511e-07,
3350
+ "loss": 0.2135,
3351
+ "step": 3460
3352
+ },
3353
+ {
3354
+ "epoch": 150.8695652173913,
3355
+ "grad_norm": 2.001293897628784,
3356
+ "learning_rate": 5.027891476303022e-07,
3357
+ "loss": 0.2623,
3358
+ "step": 3470
3359
+ },
3360
+ {
3361
+ "epoch": 151.30434782608697,
3362
+ "grad_norm": 1.6400986909866333,
3363
+ "learning_rate": 7.541837214454532e-07,
3364
+ "loss": 0.1997,
3365
+ "step": 3480
3366
+ },
3367
+ {
3368
+ "epoch": 151.7391304347826,
3369
+ "grad_norm": 2.337966203689575,
3370
+ "learning_rate": 1.0055782952606044e-06,
3371
+ "loss": 0.2472,
3372
+ "step": 3490
3373
+ },
3374
+ {
3375
+ "epoch": 152.17391304347825,
3376
+ "grad_norm": 2.081322431564331,
3377
+ "learning_rate": 1.2569728690757554e-06,
3378
+ "loss": 0.2426,
3379
+ "step": 3500
3380
+ },
3381
+ {
3382
+ "epoch": 152.6086956521739,
3383
+ "grad_norm": 1.6173598766326904,
3384
+ "learning_rate": 1.5083674428909064e-06,
3385
+ "loss": 0.2398,
3386
+ "step": 3510
3387
+ },
3388
+ {
3389
+ "epoch": 153.04347826086956,
3390
+ "grad_norm": 1.571141004562378,
3391
+ "learning_rate": 1.7597620167060574e-06,
3392
+ "loss": 0.2069,
3393
+ "step": 3520
3394
+ },
3395
+ {
3396
+ "epoch": 153.47826086956522,
3397
+ "grad_norm": 2.327928066253662,
3398
+ "learning_rate": 2.011156590521209e-06,
3399
+ "loss": 0.2502,
3400
+ "step": 3530
3401
+ },
3402
+ {
3403
+ "epoch": 153.91304347826087,
3404
+ "grad_norm": 2.673839807510376,
3405
+ "learning_rate": 2.2625511643363598e-06,
3406
+ "loss": 0.232,
3407
+ "step": 3540
3408
+ },
3409
+ {
3410
+ "epoch": 154.34782608695653,
3411
+ "grad_norm": 2.2869648933410645,
3412
+ "learning_rate": 2.5139457381515108e-06,
3413
+ "loss": 0.2399,
3414
+ "step": 3550
3415
+ },
3416
+ {
3417
+ "epoch": 154.7826086956522,
3418
+ "grad_norm": 2.043811798095703,
3419
+ "learning_rate": 2.5139454890395686e-06,
3420
+ "loss": 0.2345,
3421
+ "step": 3560
3422
+ },
3423
+ {
3424
+ "epoch": 155.2173913043478,
3425
+ "grad_norm": 1.682305932044983,
3426
+ "learning_rate": 2.51394474170384e-06,
3427
+ "loss": 0.1958,
3428
+ "step": 3570
3429
+ },
3430
+ {
3431
+ "epoch": 155.65217391304347,
3432
+ "grad_norm": 2.0729916095733643,
3433
+ "learning_rate": 2.5139434961446224e-06,
3434
+ "loss": 0.2663,
3435
+ "step": 3580
3436
+ },
3437
+ {
3438
+ "epoch": 156.08695652173913,
3439
+ "grad_norm": 1.6533286571502686,
3440
+ "learning_rate": 2.513941752362408e-06,
3441
+ "loss": 0.2031,
3442
+ "step": 3590
3443
+ },
3444
+ {
3445
+ "epoch": 156.52173913043478,
3446
+ "grad_norm": 2.51108980178833,
3447
+ "learning_rate": 2.5139395103578894e-06,
3448
+ "loss": 0.2679,
3449
+ "step": 3600
3450
+ },
3451
+ {
3452
+ "epoch": 156.52173913043478,
3453
+ "eval_loss": 0.9608185887336731,
3454
+ "eval_runtime": 0.4253,
3455
+ "eval_samples_per_second": 23.515,
3456
+ "eval_steps_per_second": 23.515,
3457
+ "step": 3600
3458
+ },
3459
+ {
3460
+ "Start_State_loss": 0.861186683177948,
3461
+ "Start_State_runtime": 0.413,
3462
+ "Start_State_samples_per_second": 24.215,
3463
+ "Start_State_steps_per_second": 24.215,
3464
+ "epoch": 156.52173913043478,
3465
+ "step": 3600
3466
+ },
3467
+ {
3468
+ "Raw_Model_loss": 0.9608185887336731,
3469
+ "Raw_Model_runtime": 0.419,
3470
+ "Raw_Model_samples_per_second": 23.864,
3471
+ "Raw_Model_steps_per_second": 23.864,
3472
+ "epoch": 156.52173913043478,
3473
+ "step": 3600
3474
+ },
3475
+ {
3476
+ "SWA_loss": 0.7903212308883667,
3477
+ "SWA_runtime": 0.4071,
3478
+ "SWA_samples_per_second": 24.562,
3479
+ "SWA_steps_per_second": 24.562,
3480
+ "epoch": 156.52173913043478,
3481
+ "step": 3600
3482
+ },
3483
+ {
3484
+ "EMA_loss": 0.8596304059028625,
3485
+ "EMA_runtime": 0.4003,
3486
+ "EMA_samples_per_second": 24.982,
3487
+ "EMA_steps_per_second": 24.982,
3488
+ "epoch": 156.52173913043478,
3489
+ "step": 3600
3490
  }
3491
  ],
3492
  "logging_steps": 10,
 
3506
  "attributes": {}
3507
  }
3508
  },
3509
+ "total_flos": 9.28760054861906e+16,
3510
  "train_batch_size": 4,
3511
  "trial_name": null,
3512
  "trial_params": null