irishprancer commited on
Commit
a037ec4
·
verified ·
1 Parent(s): 6945279

Training in progress, step 1350, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:246e9041923ae16d3d22bd9faefa3a81fe679256e2ed44291e0fcea75e5265f1
3
  size 2188456160
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dff843e8014d50d10bec270ddda8dc2e6baebdf793fcd67eebced3c57e02ad40
3
  size 2188456160
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ce0ac0837ca1e42216338266b2c5241e9a95dc9449a1ab47eae7bd131905307d
3
- size 4296624666
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:de140360c04781c2fcf93ce4113505bf5ea2998baa46973cb9bf4f062ac6fc59
3
+ size 4296624602
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:34ec58efab92925c00db5b872c2e5259dda40fae6665408b7a0f28ca940720e1
3
  size 14308
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:29772944da4342f7d17c5a95ac8b46ce227fc421640ea452e90feaa067616349
3
  size 14308
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:36ab02583668716ccf13b863e3e39d3ef8a92d21998439cfdb5fbc8f0b8524ae
3
  size 1256
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4427ef89ca6aa55d60bb3bbc2571987bf404749fd996b7c1462919265670accf
3
  size 1256
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 1.5532429218292236,
3
- "best_model_checkpoint": "./output/checkpoint-1200",
4
- "epoch": 0.043120485824140284,
5
  "eval_steps": 150,
6
- "global_step": 1200,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -911,6 +911,119 @@
911
  "eval_samples_per_second": 4.395,
912
  "eval_steps_per_second": 4.395,
913
  "step": 1200
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
914
  }
915
  ],
916
  "logging_steps": 10,
@@ -930,7 +1043,7 @@
930
  "attributes": {}
931
  }
932
  },
933
- "total_flos": 2.2333058127464448e+17,
934
  "train_batch_size": 2,
935
  "trial_name": null,
936
  "trial_params": null
 
1
  {
2
+ "best_metric": 1.5468966960906982,
3
+ "best_model_checkpoint": "./output/checkpoint-1350",
4
+ "epoch": 0.04851054655215782,
5
  "eval_steps": 150,
6
+ "global_step": 1350,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
911
  "eval_samples_per_second": 4.395,
912
  "eval_steps_per_second": 4.395,
913
  "step": 1200
914
+ },
915
+ {
916
+ "epoch": 0.04347982320600812,
917
+ "grad_norm": 20.928686141967773,
918
+ "learning_rate": 1.964692368656166e-05,
919
+ "loss": 1.603,
920
+ "step": 1210
921
+ },
922
+ {
923
+ "epoch": 0.04383916058787596,
924
+ "grad_norm": 16.554222106933594,
925
+ "learning_rate": 1.9599934836996435e-05,
926
+ "loss": 1.3694,
927
+ "step": 1220
928
+ },
929
+ {
930
+ "epoch": 0.044198497969743795,
931
+ "grad_norm": 17.05143165588379,
932
+ "learning_rate": 1.9552599890860126e-05,
933
+ "loss": 1.7721,
934
+ "step": 1230
935
+ },
936
+ {
937
+ "epoch": 0.044557835351611626,
938
+ "grad_norm": 16.701725006103516,
939
+ "learning_rate": 1.9504920793906985e-05,
940
+ "loss": 1.6165,
941
+ "step": 1240
942
+ },
943
+ {
944
+ "epoch": 0.04491717273347946,
945
+ "grad_norm": 16.80068588256836,
946
+ "learning_rate": 1.945689950603793e-05,
947
+ "loss": 1.6613,
948
+ "step": 1250
949
+ },
950
+ {
951
+ "epoch": 0.0452765101153473,
952
+ "grad_norm": 16.713293075561523,
953
+ "learning_rate": 1.9408538001220032e-05,
954
+ "loss": 1.4332,
955
+ "step": 1260
956
+ },
957
+ {
958
+ "epoch": 0.04563584749721514,
959
+ "grad_norm": 15.256826400756836,
960
+ "learning_rate": 1.9359838267405318e-05,
961
+ "loss": 1.6143,
962
+ "step": 1270
963
+ },
964
+ {
965
+ "epoch": 0.04599518487908297,
966
+ "grad_norm": 16.193984985351562,
967
+ "learning_rate": 1.931080230644911e-05,
968
+ "loss": 1.4749,
969
+ "step": 1280
970
+ },
971
+ {
972
+ "epoch": 0.046354522260950805,
973
+ "grad_norm": 15.51556396484375,
974
+ "learning_rate": 1.926143213402771e-05,
975
+ "loss": 1.8499,
976
+ "step": 1290
977
+ },
978
+ {
979
+ "epoch": 0.04671385964281864,
980
+ "grad_norm": 14.606731414794922,
981
+ "learning_rate": 1.921172977955552e-05,
982
+ "loss": 1.6146,
983
+ "step": 1300
984
+ },
985
+ {
986
+ "epoch": 0.04707319702468648,
987
+ "grad_norm": 18.136018753051758,
988
+ "learning_rate": 1.9161697286101677e-05,
989
+ "loss": 1.6007,
990
+ "step": 1310
991
+ },
992
+ {
993
+ "epoch": 0.047432534406554316,
994
+ "grad_norm": 17.7342472076416,
995
+ "learning_rate": 1.9111336710306013e-05,
996
+ "loss": 1.5332,
997
+ "step": 1320
998
+ },
999
+ {
1000
+ "epoch": 0.04779187178842215,
1001
+ "grad_norm": 12.427562713623047,
1002
+ "learning_rate": 1.9060650122294554e-05,
1003
+ "loss": 1.7003,
1004
+ "step": 1330
1005
+ },
1006
+ {
1007
+ "epoch": 0.048151209170289984,
1008
+ "grad_norm": 13.867376327514648,
1009
+ "learning_rate": 1.9009639605594407e-05,
1010
+ "loss": 1.4822,
1011
+ "step": 1340
1012
+ },
1013
+ {
1014
+ "epoch": 0.04851054655215782,
1015
+ "grad_norm": 18.720388412475586,
1016
+ "learning_rate": 1.8958307257048116e-05,
1017
+ "loss": 1.4579,
1018
+ "step": 1350
1019
+ },
1020
+ {
1021
+ "epoch": 0.04851054655215782,
1022
+ "eval_loss": 1.5468966960906982,
1023
+ "eval_runtime": 113.6859,
1024
+ "eval_samples_per_second": 4.398,
1025
+ "eval_steps_per_second": 4.398,
1026
+ "step": 1350
1027
  }
1028
  ],
1029
  "logging_steps": 10,
 
1043
  "attributes": {}
1044
  }
1045
  },
1046
+ "total_flos": 2.5148543849914368e+17,
1047
  "train_batch_size": 2,
1048
  "trial_name": null,
1049
  "trial_params": null