irishprancer commited on
Commit
61f7de6
·
verified ·
1 Parent(s): 94a8ccc

Training in progress, step 1200, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ffadfcecebb0222aa110b4c04dc7dcf449445cb1257ad373d2c573864d4946f4
3
  size 527048968
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9a6dd8eb15d303eb3b236d8f68cc9d45fec3651e65ec39b9d552d49c0ad3e89e
3
  size 527048968
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:48049032ce30e690a0e20e1c74a40a3d0d491ae37296b800adf28f948ac25c62
3
  size 1054135994
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6bdb1caf162c588fbb79a826fe5e343b59d9db2b4bcedf59ac4a5cc0d94edc2e
3
  size 1054135994
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5033c3d16bccb36ffaebb5ce29ad27ffe0f183d458fcf132e776197413917bfa
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dfd83aa19eaa65ffd8facfc7a6e6b5ccb62a4255eff28971986e07fccd1c0b48
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:15c1d156a1458cb6f00d5682720ac01d76d87fde2ae92aa38769a7cbc234762e
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:87220f7564c527fa546ba53e49f1fc40170b9568e84927eecccc6abfeef8f191
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 0.7167766094207764,
3
  "best_model_checkpoint": "./output/checkpoint-450",
4
- "epoch": 45.65217391304348,
5
  "eval_steps": 150,
6
- "global_step": 1050,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -822,6 +822,143 @@
822
  "eval_samples_per_second": 25.331,
823
  "eval_steps_per_second": 25.331,
824
  "step": 1050
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
825
  }
826
  ],
827
  "logging_steps": 10,
@@ -841,7 +978,7 @@
841
  "attributes": {}
842
  }
843
  },
844
- "total_flos": 2.696323352857805e+16,
845
  "train_batch_size": 4,
846
  "trial_name": null,
847
  "trial_params": null
 
1
  {
2
  "best_metric": 0.7167766094207764,
3
  "best_model_checkpoint": "./output/checkpoint-450",
4
+ "epoch": 52.17391304347826,
5
  "eval_steps": 150,
6
+ "global_step": 1200,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
822
  "eval_samples_per_second": 25.331,
823
  "eval_steps_per_second": 25.331,
824
  "step": 1050
825
+ },
826
+ {
827
+ "epoch": 46.08695652173913,
828
+ "grad_norm": 1.6549588441848755,
829
+ "learning_rate": 1.498658941680816e-06,
830
+ "loss": 0.4324,
831
+ "step": 1060
832
+ },
833
+ {
834
+ "epoch": 46.52173913043478,
835
+ "grad_norm": 1.7663071155548096,
836
+ "learning_rate": 2.997317883361632e-06,
837
+ "loss": 0.4069,
838
+ "step": 1070
839
+ },
840
+ {
841
+ "epoch": 46.95652173913044,
842
+ "grad_norm": 1.6513171195983887,
843
+ "learning_rate": 4.495976825042448e-06,
844
+ "loss": 0.3544,
845
+ "step": 1080
846
+ },
847
+ {
848
+ "epoch": 47.391304347826086,
849
+ "grad_norm": 1.8116092681884766,
850
+ "learning_rate": 5.994635766723264e-06,
851
+ "loss": 0.3649,
852
+ "step": 1090
853
+ },
854
+ {
855
+ "epoch": 47.82608695652174,
856
+ "grad_norm": 1.367565631866455,
857
+ "learning_rate": 7.49329470840408e-06,
858
+ "loss": 0.3867,
859
+ "step": 1100
860
+ },
861
+ {
862
+ "epoch": 48.26086956521739,
863
+ "grad_norm": 1.97525155544281,
864
+ "learning_rate": 8.991953650084896e-06,
865
+ "loss": 0.4065,
866
+ "step": 1110
867
+ },
868
+ {
869
+ "epoch": 48.69565217391305,
870
+ "grad_norm": 1.3725110292434692,
871
+ "learning_rate": 1.0490612591765711e-05,
872
+ "loss": 0.3328,
873
+ "step": 1120
874
+ },
875
+ {
876
+ "epoch": 49.130434782608695,
877
+ "grad_norm": 1.5865428447723389,
878
+ "learning_rate": 1.1989271533446528e-05,
879
+ "loss": 0.4513,
880
+ "step": 1130
881
+ },
882
+ {
883
+ "epoch": 49.56521739130435,
884
+ "grad_norm": 1.0248826742172241,
885
+ "learning_rate": 1.3487930475127345e-05,
886
+ "loss": 0.4104,
887
+ "step": 1140
888
+ },
889
+ {
890
+ "epoch": 50.0,
891
+ "grad_norm": 2.4268105030059814,
892
+ "learning_rate": 1.498658941680816e-05,
893
+ "loss": 0.3258,
894
+ "step": 1150
895
+ },
896
+ {
897
+ "epoch": 50.43478260869565,
898
+ "grad_norm": 1.2192784547805786,
899
+ "learning_rate": 1.4986587931756856e-05,
900
+ "loss": 0.3361,
901
+ "step": 1160
902
+ },
903
+ {
904
+ "epoch": 50.869565217391305,
905
+ "grad_norm": 1.552140235900879,
906
+ "learning_rate": 1.498658347660353e-05,
907
+ "loss": 0.381,
908
+ "step": 1170
909
+ },
910
+ {
911
+ "epoch": 51.30434782608695,
912
+ "grad_norm": 1.729295015335083,
913
+ "learning_rate": 1.4986576051349949e-05,
914
+ "loss": 0.3972,
915
+ "step": 1180
916
+ },
917
+ {
918
+ "epoch": 51.73913043478261,
919
+ "grad_norm": 1.041821837425232,
920
+ "learning_rate": 1.4986565655999056e-05,
921
+ "loss": 0.3202,
922
+ "step": 1190
923
+ },
924
+ {
925
+ "epoch": 52.17391304347826,
926
+ "grad_norm": 2.18989634513855,
927
+ "learning_rate": 1.4986552290554973e-05,
928
+ "loss": 0.4601,
929
+ "step": 1200
930
+ },
931
+ {
932
+ "epoch": 52.17391304347826,
933
+ "eval_loss": 0.7619463205337524,
934
+ "eval_runtime": 0.4087,
935
+ "eval_samples_per_second": 24.465,
936
+ "eval_steps_per_second": 24.465,
937
+ "step": 1200
938
+ },
939
+ {
940
+ "epoch": 52.17391304347826,
941
+ "eval_loss": 0.7558861970901489,
942
+ "eval_runtime": 0.424,
943
+ "eval_samples_per_second": 23.585,
944
+ "eval_steps_per_second": 23.585,
945
+ "step": 1200
946
+ },
947
+ {
948
+ "epoch": 52.17391304347826,
949
+ "eval_loss": 0.7579597234725952,
950
+ "eval_runtime": 0.4327,
951
+ "eval_samples_per_second": 23.111,
952
+ "eval_steps_per_second": 23.111,
953
+ "step": 1200
954
+ },
955
+ {
956
+ "epoch": 52.17391304347826,
957
+ "eval_loss": 0.7568685412406921,
958
+ "eval_runtime": 0.4348,
959
+ "eval_samples_per_second": 22.998,
960
+ "eval_steps_per_second": 22.998,
961
+ "step": 1200
962
  }
963
  ],
964
  "logging_steps": 10,
 
978
  "attributes": {}
979
  }
980
  },
981
+ "total_flos": 3.076671992345395e+16,
982
  "train_batch_size": 4,
983
  "trial_name": null,
984
  "trial_params": null