ljcamargo commited on
Commit
511712d
·
verified ·
1 Parent(s): a9d0bf9

Training in progress, step 1500, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9ad9704bc5747fc17b3c1496ffdc9c4a82ee0c2ceb16f4e948b6593950765fc1
3
  size 2558403928
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:19f020f57d4a61360034ec006ae66facade0f2d21653389e6f14602a4142a050
3
  size 2558403928
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:18ce1ad6cf71b73814b98c18ffc3d1dbf6b9d7b64f05e65823d86e41a0a2a0f4
3
  size 1313638993
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bc606d36977ccbf3d4b122958eb1f61737f84fd18ecaaffc4930cc8fd07e9364
3
  size 1313638993
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:68e939733619667823ff09361a70b450356b35690c073061e24545321b21c4b0
3
  size 14645
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b62ca0429ec7faf1375b45507b4b6556f7e8ec80e94acc260c0727d9c01b414c
3
  size 14645
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:18783150ac09b6b81cea5af47876a10bfe5f36c3d76aca4ffce5382bdfaf7b28
3
  size 1383
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ca372268f4fa9335030c0cb7aedb6cdba75f457da50e7a4034abb1a2d0843689
3
  size 1383
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c2c4ff631d77bc2fe5cad879e6c434ab3b6d8a7e0b9cce252cee47e42bdf838a
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fa39772c5e0acad2bda0112b852159f96e3b408cefd615906b49c8dfa5e48e61
3
  size 1465
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 0.96,
6
  "eval_steps": 500,
7
- "global_step": 1200,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -855,6 +855,216 @@
855
  "learning_rate": 0.00011346416107813267,
856
  "loss": 5.9226,
857
  "step": 1200
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
858
  }
859
  ],
860
  "logging_steps": 10,
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 1.2,
6
  "eval_steps": 500,
7
+ "global_step": 1500,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
855
  "learning_rate": 0.00011346416107813267,
856
  "loss": 5.9226,
857
  "step": 1200
858
+ },
859
+ {
860
+ "epoch": 0.968,
861
+ "grad_norm": 7.974459648132324,
862
+ "learning_rate": 0.00011216379785160578,
863
+ "loss": 5.8474,
864
+ "step": 1210
865
+ },
866
+ {
867
+ "epoch": 0.976,
868
+ "grad_norm": 5.838559150695801,
869
+ "learning_rate": 0.000110861343456282,
870
+ "loss": 6.0194,
871
+ "step": 1220
872
+ },
873
+ {
874
+ "epoch": 0.984,
875
+ "grad_norm": 7.7551703453063965,
876
+ "learning_rate": 0.00010955702180676632,
877
+ "loss": 5.7078,
878
+ "step": 1230
879
+ },
880
+ {
881
+ "epoch": 0.992,
882
+ "grad_norm": 7.658422470092773,
883
+ "learning_rate": 0.00010825105713867724,
884
+ "loss": 5.6615,
885
+ "step": 1240
886
+ },
887
+ {
888
+ "epoch": 1.0,
889
+ "grad_norm": 6.197235584259033,
890
+ "learning_rate": 0.0001069436739700968,
891
+ "loss": 5.7792,
892
+ "step": 1250
893
+ },
894
+ {
895
+ "epoch": 1.008,
896
+ "grad_norm": 6.991013526916504,
897
+ "learning_rate": 0.00010563509706297188,
898
+ "loss": 5.3963,
899
+ "step": 1260
900
+ },
901
+ {
902
+ "epoch": 1.016,
903
+ "grad_norm": 7.606940269470215,
904
+ "learning_rate": 0.00010432555138447404,
905
+ "loss": 5.9168,
906
+ "step": 1270
907
+ },
908
+ {
909
+ "epoch": 1.024,
910
+ "grad_norm": 6.3427042961120605,
911
+ "learning_rate": 0.0001030152620683233,
912
+ "loss": 5.6694,
913
+ "step": 1280
914
+ },
915
+ {
916
+ "epoch": 1.032,
917
+ "grad_norm": 5.197830677032471,
918
+ "learning_rate": 0.00010170445437608403,
919
+ "loss": 5.4628,
920
+ "step": 1290
921
+ },
922
+ {
923
+ "epoch": 1.04,
924
+ "grad_norm": 7.1471381187438965,
925
+ "learning_rate": 0.00010039335365843851,
926
+ "loss": 5.4371,
927
+ "step": 1300
928
+ },
929
+ {
930
+ "epoch": 1.048,
931
+ "grad_norm": 6.546257495880127,
932
+ "learning_rate": 9.908218531644521e-05,
933
+ "loss": 5.7931,
934
+ "step": 1310
935
+ },
936
+ {
937
+ "epoch": 1.056,
938
+ "grad_norm": 8.735387802124023,
939
+ "learning_rate": 9.77711747627883e-05,
940
+ "loss": 5.354,
941
+ "step": 1320
942
+ },
943
+ {
944
+ "epoch": 1.064,
945
+ "grad_norm": 6.352960109710693,
946
+ "learning_rate": 9.646054738302551e-05,
947
+ "loss": 5.0677,
948
+ "step": 1330
949
+ },
950
+ {
951
+ "epoch": 1.072,
952
+ "grad_norm": 7.88266658782959,
953
+ "learning_rate": 9.515052849684019e-05,
954
+ "loss": 5.4004,
955
+ "step": 1340
956
+ },
957
+ {
958
+ "epoch": 1.08,
959
+ "grad_norm": 5.571359634399414,
960
+ "learning_rate": 9.384134331930513e-05,
961
+ "loss": 5.3187,
962
+ "step": 1350
963
+ },
964
+ {
965
+ "epoch": 1.088,
966
+ "grad_norm": 8.573116302490234,
967
+ "learning_rate": 9.253321692216402e-05,
968
+ "loss": 5.2719,
969
+ "step": 1360
970
+ },
971
+ {
972
+ "epoch": 1.096,
973
+ "grad_norm": 9.162277221679688,
974
+ "learning_rate": 9.122637419513778e-05,
975
+ "loss": 5.6781,
976
+ "step": 1370
977
+ },
978
+ {
979
+ "epoch": 1.104,
980
+ "grad_norm": 11.947822570800781,
981
+ "learning_rate": 8.992103980726207e-05,
982
+ "loss": 5.593,
983
+ "step": 1380
984
+ },
985
+ {
986
+ "epoch": 1.112,
987
+ "grad_norm": 8.124156951904297,
988
+ "learning_rate": 8.861743816826274e-05,
989
+ "loss": 5.3142,
990
+ "step": 1390
991
+ },
992
+ {
993
+ "epoch": 1.12,
994
+ "grad_norm": 7.134088039398193,
995
+ "learning_rate": 8.731579338997594e-05,
996
+ "loss": 5.3372,
997
+ "step": 1400
998
+ },
999
+ {
1000
+ "epoch": 1.1280000000000001,
1001
+ "grad_norm": 8.928452491760254,
1002
+ "learning_rate": 8.601632924781935e-05,
1003
+ "loss": 5.4594,
1004
+ "step": 1410
1005
+ },
1006
+ {
1007
+ "epoch": 1.1360000000000001,
1008
+ "grad_norm": 5.617236614227295,
1009
+ "learning_rate": 8.471926914232137e-05,
1010
+ "loss": 5.0226,
1011
+ "step": 1420
1012
+ },
1013
+ {
1014
+ "epoch": 1.144,
1015
+ "grad_norm": 6.080244064331055,
1016
+ "learning_rate": 8.34248360607145e-05,
1017
+ "loss": 5.3677,
1018
+ "step": 1430
1019
+ },
1020
+ {
1021
+ "epoch": 1.152,
1022
+ "grad_norm": 9.846885681152344,
1023
+ "learning_rate": 8.213325253860013e-05,
1024
+ "loss": 5.2629,
1025
+ "step": 1440
1026
+ },
1027
+ {
1028
+ "epoch": 1.16,
1029
+ "grad_norm": 10.537776947021484,
1030
+ "learning_rate": 8.084474062169071e-05,
1031
+ "loss": 5.5022,
1032
+ "step": 1450
1033
+ },
1034
+ {
1035
+ "epoch": 1.168,
1036
+ "grad_norm": 5.898927688598633,
1037
+ "learning_rate": 7.955952182763624e-05,
1038
+ "loss": 5.2349,
1039
+ "step": 1460
1040
+ },
1041
+ {
1042
+ "epoch": 1.176,
1043
+ "grad_norm": 6.621062755584717,
1044
+ "learning_rate": 7.827781710794146e-05,
1045
+ "loss": 5.4929,
1046
+ "step": 1470
1047
+ },
1048
+ {
1049
+ "epoch": 1.184,
1050
+ "grad_norm": 6.793276309967041,
1051
+ "learning_rate": 7.699984680998063e-05,
1052
+ "loss": 5.136,
1053
+ "step": 1480
1054
+ },
1055
+ {
1056
+ "epoch": 1.192,
1057
+ "grad_norm": 8.44278335571289,
1058
+ "learning_rate": 7.57258306391157e-05,
1059
+ "loss": 4.9147,
1060
+ "step": 1490
1061
+ },
1062
+ {
1063
+ "epoch": 1.2,
1064
+ "grad_norm": 8.787193298339844,
1065
+ "learning_rate": 7.445598762092537e-05,
1066
+ "loss": 5.4473,
1067
+ "step": 1500
1068
  }
1069
  ],
1070
  "logging_steps": 10,
last-checkpoint/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ee90b3fca998c33de74462bb9e763a66d85eea62f73bbd6b86fed468dacef643
3
  size 5905
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:55ca17433b5f6660ff524e613cd483a70dd34c9a3734b5a0b15911c69ef6f917
3
  size 5905