ljcamargo commited on
Commit
ea8cc28
·
verified ·
1 Parent(s): c7b0669

Training in progress, step 1500, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:aaf0291871ea93dbea209fb2985c58523f617169fdf9dc24123ca1eeda1dd967
3
  size 3826461296
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0174b0116cfb9af7294711107315070cb5df5a403799c39097e36318345b480a
3
  size 3826461296
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b435d6de54230c9e25eca3d9b1e869fc663ecc5e4f4a0adca804b8b9be130aa1
3
  size 2479955235
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8804cc6d7eb057620b329159d309aa3c2e9154e9801508307ebcc5c8472c5480
3
  size 2479955235
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:22ccf8b0d2e98bf2ffc1b552009db8d48cc34af5e23f1a080c0f3f25744ca66b
3
  size 1383
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a4c90e73b569a38f99c2197447433676c2eaa22ce221aeecf0a7d6e7d0501c17
3
  size 1383
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6cc5adc9ecc7f2ef4cc81bc3d2c8c79e27e6a710a89b67da3b49f2f8440c6fb5
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ddc00765bb2f7c05738fc9294fd5cefadbd3816d98397372497b3027b71c424b
3
  size 1465
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 0.56,
6
  "eval_steps": 500,
7
- "global_step": 1400,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -988,6 +988,76 @@
988
  "learning_rate": 2.2186495176848876e-05,
989
  "loss": 0.6307,
990
  "step": 1400
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
991
  }
992
  ],
993
  "logging_steps": 10,
@@ -1007,7 +1077,7 @@
1007
  "attributes": {}
1008
  }
1009
  },
1010
- "total_flos": 2.523584640465101e+16,
1011
  "train_batch_size": 2,
1012
  "trial_name": null,
1013
  "trial_params": null
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 0.6,
6
  "eval_steps": 500,
7
+ "global_step": 1500,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
988
  "learning_rate": 2.2186495176848876e-05,
989
  "loss": 0.6307,
990
  "step": 1400
991
+ },
992
+ {
993
+ "epoch": 0.564,
994
+ "grad_norm": 12.709571838378906,
995
+ "learning_rate": 2.1985530546623795e-05,
996
+ "loss": 0.427,
997
+ "step": 1410
998
+ },
999
+ {
1000
+ "epoch": 0.568,
1001
+ "grad_norm": 10.895342826843262,
1002
+ "learning_rate": 2.1784565916398714e-05,
1003
+ "loss": 0.4546,
1004
+ "step": 1420
1005
+ },
1006
+ {
1007
+ "epoch": 0.572,
1008
+ "grad_norm": 13.848987579345703,
1009
+ "learning_rate": 2.1583601286173636e-05,
1010
+ "loss": 0.4159,
1011
+ "step": 1430
1012
+ },
1013
+ {
1014
+ "epoch": 0.576,
1015
+ "grad_norm": 16.61017608642578,
1016
+ "learning_rate": 2.1382636655948555e-05,
1017
+ "loss": 0.733,
1018
+ "step": 1440
1019
+ },
1020
+ {
1021
+ "epoch": 0.58,
1022
+ "grad_norm": 10.283616065979004,
1023
+ "learning_rate": 2.1181672025723473e-05,
1024
+ "loss": 0.4581,
1025
+ "step": 1450
1026
+ },
1027
+ {
1028
+ "epoch": 0.584,
1029
+ "grad_norm": 11.248019218444824,
1030
+ "learning_rate": 2.0980707395498395e-05,
1031
+ "loss": 0.462,
1032
+ "step": 1460
1033
+ },
1034
+ {
1035
+ "epoch": 0.588,
1036
+ "grad_norm": 10.817742347717285,
1037
+ "learning_rate": 2.0779742765273314e-05,
1038
+ "loss": 0.451,
1039
+ "step": 1470
1040
+ },
1041
+ {
1042
+ "epoch": 0.592,
1043
+ "grad_norm": 10.615836143493652,
1044
+ "learning_rate": 2.0578778135048233e-05,
1045
+ "loss": 0.5714,
1046
+ "step": 1480
1047
+ },
1048
+ {
1049
+ "epoch": 0.596,
1050
+ "grad_norm": 12.22169017791748,
1051
+ "learning_rate": 2.037781350482315e-05,
1052
+ "loss": 0.8718,
1053
+ "step": 1490
1054
+ },
1055
+ {
1056
+ "epoch": 0.6,
1057
+ "grad_norm": 13.216890335083008,
1058
+ "learning_rate": 2.0176848874598074e-05,
1059
+ "loss": 0.3766,
1060
+ "step": 1500
1061
  }
1062
  ],
1063
  "logging_steps": 10,
 
1077
  "attributes": {}
1078
  }
1079
  },
1080
+ "total_flos": 2.704157205966029e+16,
1081
  "train_batch_size": 2,
1082
  "trial_name": null,
1083
  "trial_params": null