ljcamargo commited on
Commit
ea186f9
·
verified ·
1 Parent(s): 53c9e65

Training in progress, step 1600, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0174b0116cfb9af7294711107315070cb5df5a403799c39097e36318345b480a
3
  size 3826461296
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:261060049a7dda557484b2457355c565a4a3dec4bad82ebd69d3da19bb63baea
3
  size 3826461296
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8804cc6d7eb057620b329159d309aa3c2e9154e9801508307ebcc5c8472c5480
3
  size 2479955235
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cd5a824004382fafc96a45aa34398b26619eb1ad1e0bae9e057d7991c60713ca
3
  size 2479955235
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a4c90e73b569a38f99c2197447433676c2eaa22ce221aeecf0a7d6e7d0501c17
3
  size 1383
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:66ee06c65c8a9fafa99e46b2c6a4e28ded33e6954d7a37d2b23e4b02c9c3171d
3
  size 1383
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ddc00765bb2f7c05738fc9294fd5cefadbd3816d98397372497b3027b71c424b
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c656881e8cfeb75270ca3557627f44c2ff0c812b9a941b53a9228574ee283934
3
  size 1465
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 0.6,
6
  "eval_steps": 500,
7
- "global_step": 1500,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -1058,6 +1058,76 @@
1058
  "learning_rate": 2.0176848874598074e-05,
1059
  "loss": 0.3766,
1060
  "step": 1500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1061
  }
1062
  ],
1063
  "logging_steps": 10,
@@ -1077,7 +1147,7 @@
1077
  "attributes": {}
1078
  }
1079
  },
1080
- "total_flos": 2.704157205966029e+16,
1081
  "train_batch_size": 2,
1082
  "trial_name": null,
1083
  "trial_params": null
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 0.64,
6
  "eval_steps": 500,
7
+ "global_step": 1600,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
1058
  "learning_rate": 2.0176848874598074e-05,
1059
  "loss": 0.3766,
1060
  "step": 1500
1061
+ },
1062
+ {
1063
+ "epoch": 0.604,
1064
+ "grad_norm": 13.472972869873047,
1065
+ "learning_rate": 1.9975884244372992e-05,
1066
+ "loss": 0.5891,
1067
+ "step": 1510
1068
+ },
1069
+ {
1070
+ "epoch": 0.608,
1071
+ "grad_norm": 20.179187774658203,
1072
+ "learning_rate": 1.977491961414791e-05,
1073
+ "loss": 0.3744,
1074
+ "step": 1520
1075
+ },
1076
+ {
1077
+ "epoch": 0.612,
1078
+ "grad_norm": 12.630617141723633,
1079
+ "learning_rate": 1.957395498392283e-05,
1080
+ "loss": 0.4564,
1081
+ "step": 1530
1082
+ },
1083
+ {
1084
+ "epoch": 0.616,
1085
+ "grad_norm": 20.459508895874023,
1086
+ "learning_rate": 1.9372990353697752e-05,
1087
+ "loss": 0.4566,
1088
+ "step": 1540
1089
+ },
1090
+ {
1091
+ "epoch": 0.62,
1092
+ "grad_norm": 16.580251693725586,
1093
+ "learning_rate": 1.917202572347267e-05,
1094
+ "loss": 0.447,
1095
+ "step": 1550
1096
+ },
1097
+ {
1098
+ "epoch": 0.624,
1099
+ "grad_norm": 13.90858268737793,
1100
+ "learning_rate": 1.897106109324759e-05,
1101
+ "loss": 0.4133,
1102
+ "step": 1560
1103
+ },
1104
+ {
1105
+ "epoch": 0.628,
1106
+ "grad_norm": 10.297750473022461,
1107
+ "learning_rate": 1.877009646302251e-05,
1108
+ "loss": 0.9703,
1109
+ "step": 1570
1110
+ },
1111
+ {
1112
+ "epoch": 0.632,
1113
+ "grad_norm": 19.886884689331055,
1114
+ "learning_rate": 1.856913183279743e-05,
1115
+ "loss": 0.6253,
1116
+ "step": 1580
1117
+ },
1118
+ {
1119
+ "epoch": 0.636,
1120
+ "grad_norm": 10.709681510925293,
1121
+ "learning_rate": 1.836816720257235e-05,
1122
+ "loss": 0.4657,
1123
+ "step": 1590
1124
+ },
1125
+ {
1126
+ "epoch": 0.64,
1127
+ "grad_norm": 16.986331939697266,
1128
+ "learning_rate": 1.8167202572347267e-05,
1129
+ "loss": 0.3986,
1130
+ "step": 1600
1131
  }
1132
  ],
1133
  "logging_steps": 10,
 
1147
  "attributes": {}
1148
  }
1149
  },
1150
+ "total_flos": 2.8862224942546944e+16,
1151
  "train_batch_size": 2,
1152
  "trial_name": null,
1153
  "trial_params": null