irishprancer commited on
Commit
1d8d0c4
·
verified ·
1 Parent(s): de3b67b

Training in progress, step 4800, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e4eb3a768f899ce9ce0631c58f809010c9c777e340003d975f6a1973ee4faba9
3
  size 1482788592
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:94188bdc9e08dbb6eac6a0507320b0c907fb915cfa7001c5f92af6578ad90e47
3
  size 1482788592
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d74dbeaa57407dc86512e5460fd84bfde29373f8895a653a470b4d9faeb9dbab
3
  size 2897966842
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dcdf704d4a8495afba1bae183db657000ffb8f2ac49ae7b81fb67acdebdac75e
3
  size 2897966842
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3dc9adcef27fb98366cd75be28cfa4b8a34abd675a94257bf37bd35ce6d381d9
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f7c85ac926ab9f43b148938a210afdb9bfb0e214997d909b11c6243231f842eb
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:931d38342b692c160fcb90f4d9acb8e6f6634f499984cdd5b99b0563194d400a
3
  size 1256
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:716d7ef0d2def98440e32b2cba336f73e613b85c0427aef8f0c8a6789d61bd46
3
  size 1256
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 0.581129789352417,
3
- "best_model_checkpoint": "./output/checkpoint-4500",
4
- "epoch": 0.12089854921740939,
5
  "eval_steps": 150,
6
- "global_step": 4650,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -3510,6 +3510,119 @@
3510
  "eval_samples_per_second": 11.093,
3511
  "eval_steps_per_second": 11.093,
3512
  "step": 4650
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3513
  }
3514
  ],
3515
  "logging_steps": 10,
@@ -3529,7 +3642,7 @@
3529
  "attributes": {}
3530
  }
3531
  },
3532
- "total_flos": 6.365746439287603e+17,
3533
  "train_batch_size": 8,
3534
  "trial_name": null,
3535
  "trial_params": null
 
1
  {
2
+ "best_metric": 0.580827534198761,
3
+ "best_model_checkpoint": "./output/checkpoint-4800",
4
+ "epoch": 0.12479850241797098,
5
  "eval_steps": 150,
6
+ "global_step": 4800,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
3510
  "eval_samples_per_second": 11.093,
3511
  "eval_steps_per_second": 11.093,
3512
  "step": 4650
3513
+ },
3514
+ {
3515
+ "epoch": 0.12115854609744683,
3516
+ "grad_norm": 4.750567436218262,
3517
+ "learning_rate": 2.645879041572891e-07,
3518
+ "loss": 0.4758,
3519
+ "step": 4660
3520
+ },
3521
+ {
3522
+ "epoch": 0.12141854297748426,
3523
+ "grad_norm": 9.726584434509277,
3524
+ "learning_rate": 2.4931004862810295e-07,
3525
+ "loss": 0.475,
3526
+ "step": 4670
3527
+ },
3528
+ {
3529
+ "epoch": 0.12167853985752171,
3530
+ "grad_norm": 5.911032676696777,
3531
+ "learning_rate": 2.3448152492167586e-07,
3532
+ "loss": 0.5412,
3533
+ "step": 4680
3534
+ },
3535
+ {
3536
+ "epoch": 0.12193853673755915,
3537
+ "grad_norm": 4.567657947540283,
3538
+ "learning_rate": 2.201029425805393e-07,
3539
+ "loss": 0.6175,
3540
+ "step": 4690
3541
+ },
3542
+ {
3543
+ "epoch": 0.12219853361759658,
3544
+ "grad_norm": 4.639158725738525,
3545
+ "learning_rate": 2.061748926518972e-07,
3546
+ "loss": 0.5388,
3547
+ "step": 4700
3548
+ },
3549
+ {
3550
+ "epoch": 0.12245853049763403,
3551
+ "grad_norm": 7.211503982543945,
3552
+ "learning_rate": 1.9269794766333073e-07,
3553
+ "loss": 0.4986,
3554
+ "step": 4710
3555
+ },
3556
+ {
3557
+ "epoch": 0.12271852737767147,
3558
+ "grad_norm": 5.461792945861816,
3559
+ "learning_rate": 1.7967266159925864e-07,
3560
+ "loss": 0.4924,
3561
+ "step": 4720
3562
+ },
3563
+ {
3564
+ "epoch": 0.1229785242577089,
3565
+ "grad_norm": 4.597755432128906,
3566
+ "learning_rate": 1.670995698781777e-07,
3567
+ "loss": 0.5227,
3568
+ "step": 4730
3569
+ },
3570
+ {
3571
+ "epoch": 0.12323852113774635,
3572
+ "grad_norm": 4.296584606170654,
3573
+ "learning_rate": 1.549791893306424e-07,
3574
+ "loss": 0.4637,
3575
+ "step": 4740
3576
+ },
3577
+ {
3578
+ "epoch": 0.12349851801778379,
3579
+ "grad_norm": 5.01540994644165,
3580
+ "learning_rate": 1.4331201817802332e-07,
3581
+ "loss": 0.5115,
3582
+ "step": 4750
3583
+ },
3584
+ {
3585
+ "epoch": 0.12375851489782122,
3586
+ "grad_norm": 6.2236528396606445,
3587
+ "learning_rate": 1.320985360120322e-07,
3588
+ "loss": 0.4769,
3589
+ "step": 4760
3590
+ },
3591
+ {
3592
+ "epoch": 0.12401851177785866,
3593
+ "grad_norm": 6.63411283493042,
3594
+ "learning_rate": 1.2133920377499848e-07,
3595
+ "loss": 0.5284,
3596
+ "step": 4770
3597
+ },
3598
+ {
3599
+ "epoch": 0.12427850865789611,
3600
+ "grad_norm": 6.101823329925537,
3601
+ "learning_rate": 1.1103446374092981e-07,
3602
+ "loss": 0.5553,
3603
+ "step": 4780
3604
+ },
3605
+ {
3606
+ "epoch": 0.12453850553793354,
3607
+ "grad_norm": 5.5220441818237305,
3608
+ "learning_rate": 1.0118473949732765e-07,
3609
+ "loss": 0.5592,
3610
+ "step": 4790
3611
+ },
3612
+ {
3613
+ "epoch": 0.12479850241797098,
3614
+ "grad_norm": 5.166587829589844,
3615
+ "learning_rate": 9.179043592777716e-08,
3616
+ "loss": 0.4284,
3617
+ "step": 4800
3618
+ },
3619
+ {
3620
+ "epoch": 0.12479850241797098,
3621
+ "eval_loss": 0.580827534198761,
3622
+ "eval_runtime": 47.7849,
3623
+ "eval_samples_per_second": 10.464,
3624
+ "eval_steps_per_second": 10.464,
3625
+ "step": 4800
3626
  }
3627
  ],
3628
  "logging_steps": 10,
 
3642
  "attributes": {}
3643
  }
3644
  },
3645
+ "total_flos": 6.567131782486426e+17,
3646
  "train_batch_size": 8,
3647
  "trial_name": null,
3648
  "trial_params": null