mgh6 commited on
Commit
730f303
·
verified ·
1 Parent(s): 72b5738

Training in progress, step 14600, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:df153e2b68d2a9610611bbea018029c9ed4e9f282e1501e464298b99d68edaeb
3
  size 8137792
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8cac6a6f7a31e5da389a45bb3c9f082ba3080e29ab9fc8d8efbfd63df38debee
3
  size 8137792
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:da99fdde9c2172b84053582c25d7a3ca04541c3317694556f84d04188f3607c3
3
  size 16386426
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0ac03cb4783a47c0a95be9f4a129f0f52588882668ede0ca0972e32f6e8d7fa2
3
  size 16386426
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:32dd23f8bccd4e3ea7c87c279b793f3dc99695bcc2a9ced7f6f6b74af90f2794
3
  size 15006
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fa6c6ef1a13072691bc9776f605bb9e006b751f451537eba6fdf8f832f1aa17f
3
  size 15006
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7dbcc644babab78a4f1af1646b1c89168aace3f006aa80fe27a175849c621a79
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f44328417ddaab62e319c451bb3d9fe74cb7a3ed5a7dc379d94953a6ec72b89f
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 28.995469457897205,
5
  "eval_steps": 500,
6
- "global_step": 14500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1254,6 +1254,13 @@
1254
  "eval_samples_per_second": 19.903,
1255
  "eval_steps_per_second": 2.489,
1256
  "step": 14500
 
 
 
 
 
 
 
1257
  }
1258
  ],
1259
  "logging_steps": 100,
@@ -1273,7 +1280,7 @@
1273
  "attributes": {}
1274
  }
1275
  },
1276
- "total_flos": 2.2630614429916463e+19,
1277
  "train_batch_size": 8,
1278
  "trial_name": null,
1279
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 29.195438212779255,
5
  "eval_steps": 500,
6
+ "global_step": 14600,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1254
  "eval_samples_per_second": 19.903,
1255
  "eval_steps_per_second": 2.489,
1256
  "step": 14500
1257
+ },
1258
+ {
1259
+ "epoch": 29.195438212779255,
1260
+ "grad_norm": 0.24403980374336243,
1261
+ "learning_rate": 2.666666666666667e-06,
1262
+ "loss": 1.2162,
1263
+ "step": 14600
1264
  }
1265
  ],
1266
  "logging_steps": 100,
 
1280
  "attributes": {}
1281
  }
1282
  },
1283
+ "total_flos": 2.2786680244234355e+19,
1284
  "train_batch_size": 8,
1285
  "trial_name": null,
1286
  "trial_params": null