FormlessAI commited on
Commit
bd7fe10
·
verified ·
1 Parent(s): cd63e3e

Training in progress, epoch 2, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:984e68604b6af75b411f5f11e88160ab14ffa793f8bd5c647df8bebecd3b18c0
3
  size 364930784
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5910e0a282e4df32eef0552c8b3139c44b0817839e4759d05b0434f2de570164
3
  size 364930784
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:78109ddd55b2deba0ced0e197023af2a86391881ee5edd68293eec75d7854127
3
  size 185530443
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:985a31fdc1b7c2050bb3569366a09e87d28883fa8a2c2d07e33459da0df3290e
3
  size 185530443
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b842000329cb019d1d2edb7e10becc78690394c9823dbe2e83a8aa37ec27bc0b
3
  size 15429
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cede62ea1101cd7c1a2b08854972b212cc8c8285489e3cdb8d86af1f7b9e8d9b
3
  size 15429
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b81f32a8b1ffb9abe5759b7adf548cb057f3278522c4461efa97ab5f32eb6c06
3
  size 15429
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a96cad50eef2317d5b06d359661294bcf4e10472a2fe4aa3e2c96c25afaf8fe4
3
  size 15429
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:19bd092966b887f951d3dc5d6ca48eca53fa0ae856668ccafe6b6441f719ca4e
3
  size 15429
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:653c374a59cc448606819acd79f0d50657fef2b2d01bddc9aff0cb92325491aa
3
  size 15429
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2c493249489e6bc5240397221441de65fc413ddc008851f514ca031c390927c3
3
  size 15429
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2b0ed1fb8354c06bf0eab3c9b2dbb6b716f1ca765c82ae4407881d1f78bd018a
3
  size 15429
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cca51c1849ad1ddc701e6aec02fafde7c902a86faef1a8a581fbe7052164735c
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4dbed6f9227c6453885e3fcec169430d1d02615fe4f493d1e5b46420af58b713
3
  size 1465
last-checkpoint/trainer_state.json CHANGED
@@ -1,10 +1,10 @@
1
  {
2
  "best_global_step": null,
3
- "best_metric": 0.5373325943946838,
4
  "best_model_checkpoint": null,
5
- "epoch": 2.838929152492235,
6
  "eval_steps": 50,
7
- "global_step": 4800,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -7496,6 +7496,240 @@
7496
  "eval_samples_per_second": 425.989,
7497
  "eval_steps_per_second": 13.324,
7498
  "step": 4800
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7499
  }
7500
  ],
7501
  "logging_steps": 5,
@@ -7510,7 +7744,7 @@
7510
  "early_stopping_threshold": 0.0001
7511
  },
7512
  "attributes": {
7513
- "early_stopping_patience_counter": 0
7514
  }
7515
  },
7516
  "TrainerControl": {
@@ -7519,12 +7753,12 @@
7519
  "should_evaluate": false,
7520
  "should_log": false,
7521
  "should_save": true,
7522
- "should_training_stop": false
7523
  },
7524
  "attributes": {}
7525
  }
7526
  },
7527
- "total_flos": 3.7562856206877327e+18,
7528
  "train_batch_size": 8,
7529
  "trial_name": null,
7530
  "trial_params": null
 
1
  {
2
  "best_global_step": null,
3
+ "best_metric": 0.5373095273971558,
4
  "best_model_checkpoint": null,
5
+ "epoch": 2.9276734210915545,
6
  "eval_steps": 50,
7
+ "global_step": 4950,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
7496
  "eval_samples_per_second": 425.989,
7497
  "eval_steps_per_second": 13.324,
7498
  "step": 4800
7499
+ },
7500
+ {
7501
+ "epoch": 2.841887294778879,
7502
+ "grad_norm": 0.22534750401973724,
7503
+ "learning_rate": 1.6351597887982846e-06,
7504
+ "loss": 0.581,
7505
+ "step": 4805
7506
+ },
7507
+ {
7508
+ "epoch": 2.8448454370655227,
7509
+ "grad_norm": 0.23928098380565643,
7510
+ "learning_rate": 1.5529003378542404e-06,
7511
+ "loss": 0.5837,
7512
+ "step": 4810
7513
+ },
7514
+ {
7515
+ "epoch": 2.847803579352167,
7516
+ "grad_norm": 0.21647833287715912,
7517
+ "learning_rate": 1.4727559570263333e-06,
7518
+ "loss": 0.5701,
7519
+ "step": 4815
7520
+ },
7521
+ {
7522
+ "epoch": 2.850761721638811,
7523
+ "grad_norm": 0.2176506221294403,
7524
+ "learning_rate": 1.3947274699220398e-06,
7525
+ "loss": 0.5626,
7526
+ "step": 4820
7527
+ },
7528
+ {
7529
+ "epoch": 2.8537198639254546,
7530
+ "grad_norm": 0.21065934002399445,
7531
+ "learning_rate": 1.3188156784048088e-06,
7532
+ "loss": 0.5686,
7533
+ "step": 4825
7534
+ },
7535
+ {
7536
+ "epoch": 2.856678006212099,
7537
+ "grad_norm": 0.22182585299015045,
7538
+ "learning_rate": 1.2450213625857274e-06,
7539
+ "loss": 0.5761,
7540
+ "step": 4830
7541
+ },
7542
+ {
7543
+ "epoch": 2.8596361484987427,
7544
+ "grad_norm": 0.21298271417617798,
7545
+ "learning_rate": 1.1733452808156017e-06,
7546
+ "loss": 0.5867,
7547
+ "step": 4835
7548
+ },
7549
+ {
7550
+ "epoch": 2.8625942907853865,
7551
+ "grad_norm": 0.229048490524292,
7552
+ "learning_rate": 1.103788169677036e-06,
7553
+ "loss": 0.589,
7554
+ "step": 4840
7555
+ },
7556
+ {
7557
+ "epoch": 2.865552433072031,
7558
+ "grad_norm": 0.2213655412197113,
7559
+ "learning_rate": 1.0363507439769986e-06,
7560
+ "loss": 0.5597,
7561
+ "step": 4845
7562
+ },
7563
+ {
7564
+ "epoch": 2.8685105753586746,
7565
+ "grad_norm": 0.21822868287563324,
7566
+ "learning_rate": 9.7103369673936e-07,
7567
+ "loss": 0.5712,
7568
+ "step": 4850
7569
+ },
7570
+ {
7571
+ "epoch": 2.8685105753586746,
7572
+ "eval_loss": 0.5373578667640686,
7573
+ "eval_runtime": 15.1783,
7574
+ "eval_samples_per_second": 427.584,
7575
+ "eval_steps_per_second": 13.374,
7576
+ "step": 4850
7577
+ },
7578
+ {
7579
+ "epoch": 2.871468717645319,
7580
+ "grad_norm": 0.22016650438308716,
7581
+ "learning_rate": 9.078376991978266e-07,
7582
+ "loss": 0.5587,
7583
+ "step": 4855
7584
+ },
7585
+ {
7586
+ "epoch": 2.8744268599319627,
7587
+ "grad_norm": 0.23947712779045105,
7588
+ "learning_rate": 8.467634007890796e-07,
7589
+ "loss": 0.5841,
7590
+ "step": 4860
7591
+ },
7592
+ {
7593
+ "epoch": 2.877385002218607,
7594
+ "grad_norm": 0.2243824005126953,
7595
+ "learning_rate": 7.878114291460063e-07,
7596
+ "loss": 0.5736,
7597
+ "step": 4865
7598
+ },
7599
+ {
7600
+ "epoch": 2.8803431445052508,
7601
+ "grad_norm": 0.22133906185626984,
7602
+ "learning_rate": 7.309823900913461e-07,
7603
+ "loss": 0.5764,
7604
+ "step": 4870
7605
+ },
7606
+ {
7607
+ "epoch": 2.8833012867918946,
7608
+ "grad_norm": 0.21976634860038757,
7609
+ "learning_rate": 6.76276867631405e-07,
7610
+ "loss": 0.5699,
7611
+ "step": 4875
7612
+ },
7613
+ {
7614
+ "epoch": 2.886259429078539,
7615
+ "grad_norm": 0.22008314728736877,
7616
+ "learning_rate": 6.236954239500471e-07,
7617
+ "loss": 0.5527,
7618
+ "step": 4880
7619
+ },
7620
+ {
7621
+ "epoch": 2.8892175713651826,
7622
+ "grad_norm": 0.22807146608829498,
7623
+ "learning_rate": 5.732385994029618e-07,
7624
+ "loss": 0.5943,
7625
+ "step": 4885
7626
+ },
7627
+ {
7628
+ "epoch": 2.8921757136518265,
7629
+ "grad_norm": 0.22938776016235352,
7630
+ "learning_rate": 5.249069125121154e-07,
7631
+ "loss": 0.5825,
7632
+ "step": 4890
7633
+ },
7634
+ {
7635
+ "epoch": 2.8951338559384707,
7636
+ "grad_norm": 0.20941923558712006,
7637
+ "learning_rate": 4.787008599603642e-07,
7638
+ "loss": 0.5685,
7639
+ "step": 4895
7640
+ },
7641
+ {
7642
+ "epoch": 2.8980919982251145,
7643
+ "grad_norm": 0.22085338830947876,
7644
+ "learning_rate": 4.346209165863655e-07,
7645
+ "loss": 0.5588,
7646
+ "step": 4900
7647
+ },
7648
+ {
7649
+ "epoch": 2.8980919982251145,
7650
+ "eval_loss": 0.5373329520225525,
7651
+ "eval_runtime": 15.2559,
7652
+ "eval_samples_per_second": 425.409,
7653
+ "eval_steps_per_second": 13.306,
7654
+ "step": 4900
7655
+ },
7656
+ {
7657
+ "epoch": 2.9010501405117584,
7658
+ "grad_norm": 0.22424866259098053,
7659
+ "learning_rate": 3.926675353797443e-07,
7660
+ "loss": 0.5725,
7661
+ "step": 4905
7662
+ },
7663
+ {
7664
+ "epoch": 2.9040082827984026,
7665
+ "grad_norm": 0.2182874232530594,
7666
+ "learning_rate": 3.5284114747641856e-07,
7667
+ "loss": 0.5582,
7668
+ "step": 4910
7669
+ },
7670
+ {
7671
+ "epoch": 2.9069664250850464,
7672
+ "grad_norm": 0.21973784267902374,
7673
+ "learning_rate": 3.151421621541335e-07,
7674
+ "loss": 0.5684,
7675
+ "step": 4915
7676
+ },
7677
+ {
7678
+ "epoch": 2.9099245673716907,
7679
+ "grad_norm": 0.2083846479654312,
7680
+ "learning_rate": 2.795709668283172e-07,
7681
+ "loss": 0.578,
7682
+ "step": 4920
7683
+ },
7684
+ {
7685
+ "epoch": 2.9128827096583345,
7686
+ "grad_norm": 0.2196836769580841,
7687
+ "learning_rate": 2.4612792704798287e-07,
7688
+ "loss": 0.5603,
7689
+ "step": 4925
7690
+ },
7691
+ {
7692
+ "epoch": 2.9158408519449788,
7693
+ "grad_norm": 0.22254040837287903,
7694
+ "learning_rate": 2.1481338649216013e-07,
7695
+ "loss": 0.5526,
7696
+ "step": 4930
7697
+ },
7698
+ {
7699
+ "epoch": 2.9187989942316226,
7700
+ "grad_norm": 0.2200893610715866,
7701
+ "learning_rate": 1.8562766696618855e-07,
7702
+ "loss": 0.5661,
7703
+ "step": 4935
7704
+ },
7705
+ {
7706
+ "epoch": 2.9217571365182664,
7707
+ "grad_norm": 0.22102928161621094,
7708
+ "learning_rate": 1.5857106839847136e-07,
7709
+ "loss": 0.5905,
7710
+ "step": 4940
7711
+ },
7712
+ {
7713
+ "epoch": 2.9247152788049107,
7714
+ "grad_norm": 0.2244081199169159,
7715
+ "learning_rate": 1.3364386883745962e-07,
7716
+ "loss": 0.5743,
7717
+ "step": 4945
7718
+ },
7719
+ {
7720
+ "epoch": 2.9276734210915545,
7721
+ "grad_norm": 0.23028399050235748,
7722
+ "learning_rate": 1.1084632444868224e-07,
7723
+ "loss": 0.5852,
7724
+ "step": 4950
7725
+ },
7726
+ {
7727
+ "epoch": 2.9276734210915545,
7728
+ "eval_loss": 0.5373095273971558,
7729
+ "eval_runtime": 15.2077,
7730
+ "eval_samples_per_second": 426.758,
7731
+ "eval_steps_per_second": 13.349,
7732
+ "step": 4950
7733
  }
7734
  ],
7735
  "logging_steps": 5,
 
7744
  "early_stopping_threshold": 0.0001
7745
  },
7746
  "attributes": {
7747
+ "early_stopping_patience_counter": 3
7748
  }
7749
  },
7750
  "TrainerControl": {
 
7753
  "should_evaluate": false,
7754
  "should_log": false,
7755
  "should_save": true,
7756
+ "should_training_stop": true
7757
  },
7758
  "attributes": {}
7759
  }
7760
  },
7761
+ "total_flos": 3.873445665417724e+18,
7762
  "train_batch_size": 8,
7763
  "trial_name": null,
7764
  "trial_params": null