ljcamargo commited on
Commit
e362f9c
·
verified ·
1 Parent(s): f39eb83

Training in progress, step 1800, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3919cab12afb691f8e2bde9aed0bdad3628d6f1a5ecae97beb9b67f52859024e
3
  size 3826461296
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:229e19659100db88dd521b24c7f3783cb59725c077f5c58e0b1e8cbed6566cad
3
  size 3826461296
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:65cec01b065a22732babc2be6945a5935ab48f5f41fd2fba8b539e6256b0dfa7
3
  size 2479955235
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:086d4ae403b4653b546f4d669e2f8c98a6c3bc786f7ff28201dea70b2067e4f2
3
  size 2479955235
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:da57c8097b451ef1168f1b0191d0689aff1a3bd0997413b1e9eeee0934b0b53c
3
  size 1383
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1547aae10ac7691e1716f567b08e3b4d274fa923879a48af8c2bb55c815a28a2
3
  size 1383
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3d5ccf396d48a7891c1332094feb71b5d5d1edce123ef8038fc290770c5e3a02
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cdd5d251a495085a19339ae2e6833dedf33f9b2050a0e70b16dd4cd5da2b7a12
3
  size 1465
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 0.68,
6
  "eval_steps": 500,
7
- "global_step": 1700,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -1198,6 +1198,76 @@
1198
  "learning_rate": 1.6157556270096464e-05,
1199
  "loss": 0.4807,
1200
  "step": 1700
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1201
  }
1202
  ],
1203
  "logging_steps": 10,
@@ -1217,7 +1287,7 @@
1217
  "attributes": {}
1218
  }
1219
  },
1220
- "total_flos": 3.0652319992449024e+16,
1221
  "train_batch_size": 2,
1222
  "trial_name": null,
1223
  "trial_params": null
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 0.72,
6
  "eval_steps": 500,
7
+ "global_step": 1800,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
1198
  "learning_rate": 1.6157556270096464e-05,
1199
  "loss": 0.4807,
1200
  "step": 1700
1201
+ },
1202
+ {
1203
+ "epoch": 0.684,
1204
+ "grad_norm": 9.49954891204834,
1205
+ "learning_rate": 1.5956591639871383e-05,
1206
+ "loss": 0.3647,
1207
+ "step": 1710
1208
+ },
1209
+ {
1210
+ "epoch": 0.688,
1211
+ "grad_norm": 14.690208435058594,
1212
+ "learning_rate": 1.5755627009646305e-05,
1213
+ "loss": 0.3715,
1214
+ "step": 1720
1215
+ },
1216
+ {
1217
+ "epoch": 0.692,
1218
+ "grad_norm": 12.074922561645508,
1219
+ "learning_rate": 1.5554662379421224e-05,
1220
+ "loss": 0.491,
1221
+ "step": 1730
1222
+ },
1223
+ {
1224
+ "epoch": 0.696,
1225
+ "grad_norm": 13.278485298156738,
1226
+ "learning_rate": 1.5353697749196143e-05,
1227
+ "loss": 0.4185,
1228
+ "step": 1740
1229
+ },
1230
+ {
1231
+ "epoch": 0.7,
1232
+ "grad_norm": 12.987263679504395,
1233
+ "learning_rate": 1.5152733118971063e-05,
1234
+ "loss": 0.5613,
1235
+ "step": 1750
1236
+ },
1237
+ {
1238
+ "epoch": 0.704,
1239
+ "grad_norm": 6.863049030303955,
1240
+ "learning_rate": 1.4951768488745982e-05,
1241
+ "loss": 0.3245,
1242
+ "step": 1760
1243
+ },
1244
+ {
1245
+ "epoch": 0.708,
1246
+ "grad_norm": 11.087668418884277,
1247
+ "learning_rate": 1.47508038585209e-05,
1248
+ "loss": 0.4174,
1249
+ "step": 1770
1250
+ },
1251
+ {
1252
+ "epoch": 0.712,
1253
+ "grad_norm": 5.16309118270874,
1254
+ "learning_rate": 1.4549839228295819e-05,
1255
+ "loss": 0.3233,
1256
+ "step": 1780
1257
+ },
1258
+ {
1259
+ "epoch": 0.716,
1260
+ "grad_norm": 12.031776428222656,
1261
+ "learning_rate": 1.4348874598070741e-05,
1262
+ "loss": 0.3574,
1263
+ "step": 1790
1264
+ },
1265
+ {
1266
+ "epoch": 0.72,
1267
+ "grad_norm": 13.569413185119629,
1268
+ "learning_rate": 1.414790996784566e-05,
1269
+ "loss": 0.5619,
1270
+ "step": 1800
1271
  }
1272
  ],
1273
  "logging_steps": 10,
 
1287
  "attributes": {}
1288
  }
1289
  },
1290
+ "total_flos": 3.2448823590445056e+16,
1291
  "train_batch_size": 2,
1292
  "trial_name": null,
1293
  "trial_params": null