Darayut commited on
Commit
77606b4
·
verified ·
1 Parent(s): 6a5c5ab

Training in progress, step 9000, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1ac9471fd5df30d3c5ea7544c71ea3e7cefb44f3fb9490b2909ef10388bce490
3
  size 14895064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7319cb689ba3e516dee8119d6166d02f60346a80b9407e1829e0eb7a9c7cf058
3
  size 14895064
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d59192f8bbc2c6a84f768006dec09d697146bbe5c387df123b2916fcbd0ab943
3
  size 29912011
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:28fc732bf1109f671d4c358012abdfc8822737d44a1596623d8cde2a2fb3947a
3
  size 29912011
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3ab438fe94b3989ed8322d28570b903861ed0d770c25fa9ec9f46a8cab223787
3
  size 14645
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d37c363d72cc9de2f50d65bc99f818e3959d420932e0bc02d7eeef7adb2f100b
3
  size 14645
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9186fd1e64a1c1811e18ab635d1272734f47c9569eaa3cd8b4ec661120caca0d
3
  size 1383
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c8e166b0e1532c50e06c9eed7d1cdc679b1c6021d721dee7e20d7bc2206cdce7
3
  size 1383
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:77a6f2ecf1ccf7042296687a38ce67c7addcc2ead0ac8b4635abf563c9658325
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:014d66dbf58608e4fb1d45e02e80d4e24fcc03a53dcd66abd6565081b6009d7b
3
  size 1465
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 0.8888888888888888,
6
  "eval_steps": 1000,
7
- "global_step": 8000,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -640,6 +640,85 @@
640
  "eval_samples_per_second": 11.685,
641
  "eval_steps_per_second": 1.461,
642
  "step": 8000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
643
  }
644
  ],
645
  "logging_steps": 100,
@@ -654,12 +733,12 @@
654
  "should_evaluate": false,
655
  "should_log": false,
656
  "should_save": true,
657
- "should_training_stop": false
658
  },
659
  "attributes": {}
660
  }
661
  },
662
- "total_flos": 1.122565267390464e+18,
663
  "train_batch_size": 8,
664
  "trial_name": null,
665
  "trial_params": null
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 1.0363333333333333,
6
  "eval_steps": 1000,
7
+ "global_step": 9000,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
640
  "eval_samples_per_second": 11.685,
641
  "eval_steps_per_second": 1.461,
642
  "step": 8000
643
+ },
644
+ {
645
+ "epoch": 0.9,
646
+ "grad_norm": 16.134510040283203,
647
+ "learning_rate": 1.2388931252988811e-06,
648
+ "loss": 0.1359,
649
+ "step": 8100
650
+ },
651
+ {
652
+ "epoch": 0.9111111111111111,
653
+ "grad_norm": 0.4015595018863678,
654
+ "learning_rate": 9.812981544816224e-07,
655
+ "loss": 0.1457,
656
+ "step": 8200
657
+ },
658
+ {
659
+ "epoch": 0.9222222222222223,
660
+ "grad_norm": 0.4668665826320648,
661
+ "learning_rate": 7.530286434071432e-07,
662
+ "loss": 0.1207,
663
+ "step": 8300
664
+ },
665
+ {
666
+ "epoch": 0.9333333333333333,
667
+ "grad_norm": 0.8954722285270691,
668
+ "learning_rate": 5.544365671162189e-07,
669
+ "loss": 0.1214,
670
+ "step": 8400
671
+ },
672
+ {
673
+ "epoch": 0.9444444444444444,
674
+ "grad_norm": 3.937987804412842,
675
+ "learning_rate": 3.8582814019624714e-07,
676
+ "loss": 0.1254,
677
+ "step": 8500
678
+ },
679
+ {
680
+ "epoch": 0.9555555555555556,
681
+ "grad_norm": 0.34690240025520325,
682
+ "learning_rate": 2.474633446205554e-07,
683
+ "loss": 0.1347,
684
+ "step": 8600
685
+ },
686
+ {
687
+ "epoch": 1.003,
688
+ "grad_norm": 0.4924575388431549,
689
+ "learning_rate": 1.3955552887504387e-07,
690
+ "loss": 0.1372,
691
+ "step": 8700
692
+ },
693
+ {
694
+ "epoch": 1.0141111111111112,
695
+ "grad_norm": 5.0098772048950195,
696
+ "learning_rate": 6.227107899027696e-08,
697
+ "loss": 0.1612,
698
+ "step": 8800
699
+ },
700
+ {
701
+ "epoch": 1.0252222222222223,
702
+ "grad_norm": 0.7986459732055664,
703
+ "learning_rate": 1.572916198628649e-08,
704
+ "loss": 0.1477,
705
+ "step": 8900
706
+ },
707
+ {
708
+ "epoch": 1.0363333333333333,
709
+ "grad_norm": 0.36028048396110535,
710
+ "learning_rate": 1.5421256671643848e-12,
711
+ "loss": 0.1418,
712
+ "step": 9000
713
+ },
714
+ {
715
+ "epoch": 1.0363333333333333,
716
+ "eval_loss": 0.3109706938266754,
717
+ "eval_mean_iou": 0.3996240933787572,
718
+ "eval_runtime": 16.6041,
719
+ "eval_samples_per_second": 12.045,
720
+ "eval_steps_per_second": 1.506,
721
+ "step": 9000
722
  }
723
  ],
724
  "logging_steps": 100,
 
733
  "should_evaluate": false,
734
  "should_log": false,
735
  "should_save": true,
736
+ "should_training_stop": true
737
  },
738
  "attributes": {}
739
  }
740
  },
741
+ "total_flos": 1.2627280650735452e+18,
742
  "train_batch_size": 8,
743
  "trial_name": null,
744
  "trial_params": null