ljcamargo commited on
Commit
5098961
·
verified ·
1 Parent(s): 960703a

Training in progress, step 1700, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:261060049a7dda557484b2457355c565a4a3dec4bad82ebd69d3da19bb63baea
3
  size 3826461296
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3919cab12afb691f8e2bde9aed0bdad3628d6f1a5ecae97beb9b67f52859024e
3
  size 3826461296
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cd5a824004382fafc96a45aa34398b26619eb1ad1e0bae9e057d7991c60713ca
3
  size 2479955235
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:65cec01b065a22732babc2be6945a5935ab48f5f41fd2fba8b539e6256b0dfa7
3
  size 2479955235
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:66ee06c65c8a9fafa99e46b2c6a4e28ded33e6954d7a37d2b23e4b02c9c3171d
3
  size 1383
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:da57c8097b451ef1168f1b0191d0689aff1a3bd0997413b1e9eeee0934b0b53c
3
  size 1383
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c656881e8cfeb75270ca3557627f44c2ff0c812b9a941b53a9228574ee283934
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3d5ccf396d48a7891c1332094feb71b5d5d1edce123ef8038fc290770c5e3a02
3
  size 1465
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 0.64,
6
  "eval_steps": 500,
7
- "global_step": 1600,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -1128,6 +1128,76 @@
1128
  "learning_rate": 1.8167202572347267e-05,
1129
  "loss": 0.3986,
1130
  "step": 1600
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1131
  }
1132
  ],
1133
  "logging_steps": 10,
@@ -1147,7 +1217,7 @@
1147
  "attributes": {}
1148
  }
1149
  },
1150
- "total_flos": 2.8862224942546944e+16,
1151
  "train_batch_size": 2,
1152
  "trial_name": null,
1153
  "trial_params": null
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 0.68,
6
  "eval_steps": 500,
7
+ "global_step": 1700,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
1128
  "learning_rate": 1.8167202572347267e-05,
1129
  "loss": 0.3986,
1130
  "step": 1600
1131
+ },
1132
+ {
1133
+ "epoch": 0.644,
1134
+ "grad_norm": 13.649085998535156,
1135
+ "learning_rate": 1.796623794212219e-05,
1136
+ "loss": 0.4337,
1137
+ "step": 1610
1138
+ },
1139
+ {
1140
+ "epoch": 0.648,
1141
+ "grad_norm": 7.645134449005127,
1142
+ "learning_rate": 1.7765273311897108e-05,
1143
+ "loss": 0.3901,
1144
+ "step": 1620
1145
+ },
1146
+ {
1147
+ "epoch": 0.652,
1148
+ "grad_norm": 11.727263450622559,
1149
+ "learning_rate": 1.7564308681672027e-05,
1150
+ "loss": 0.3545,
1151
+ "step": 1630
1152
+ },
1153
+ {
1154
+ "epoch": 0.656,
1155
+ "grad_norm": 6.705881595611572,
1156
+ "learning_rate": 1.736334405144695e-05,
1157
+ "loss": 0.3471,
1158
+ "step": 1640
1159
+ },
1160
+ {
1161
+ "epoch": 0.66,
1162
+ "grad_norm": 12.363304138183594,
1163
+ "learning_rate": 1.7162379421221868e-05,
1164
+ "loss": 0.4351,
1165
+ "step": 1650
1166
+ },
1167
+ {
1168
+ "epoch": 0.664,
1169
+ "grad_norm": 20.208723068237305,
1170
+ "learning_rate": 1.6961414790996786e-05,
1171
+ "loss": 0.4284,
1172
+ "step": 1660
1173
+ },
1174
+ {
1175
+ "epoch": 0.668,
1176
+ "grad_norm": 10.82363224029541,
1177
+ "learning_rate": 1.6760450160771705e-05,
1178
+ "loss": 0.3369,
1179
+ "step": 1670
1180
+ },
1181
+ {
1182
+ "epoch": 0.672,
1183
+ "grad_norm": 9.544486045837402,
1184
+ "learning_rate": 1.6559485530546627e-05,
1185
+ "loss": 0.4059,
1186
+ "step": 1680
1187
+ },
1188
+ {
1189
+ "epoch": 0.676,
1190
+ "grad_norm": 8.426627159118652,
1191
+ "learning_rate": 1.6358520900321546e-05,
1192
+ "loss": 0.4494,
1193
+ "step": 1690
1194
+ },
1195
+ {
1196
+ "epoch": 0.68,
1197
+ "grad_norm": 8.424084663391113,
1198
+ "learning_rate": 1.6157556270096464e-05,
1199
+ "loss": 0.4807,
1200
+ "step": 1700
1201
  }
1202
  ],
1203
  "logging_steps": 10,
 
1217
  "attributes": {}
1218
  }
1219
  },
1220
+ "total_flos": 3.0652319992449024e+16,
1221
  "train_batch_size": 2,
1222
  "trial_name": null,
1223
  "trial_params": null