thangvip commited on
Commit
4468aff
·
verified ·
1 Parent(s): c50ee80

Training in progress, step 322, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b5e97d7f8deab5684b15ac25e015cf07b4873deafd40128243449e9005c3cd82
3
  size 147770496
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:90be58b78feaec07f3dd645fc25daef6abb93eb2bf58d736c709779d8d9b40d1
3
  size 147770496
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ce8f9b29804ad47fd460f6b94823b515a18cf2d8d1ece23860cda36616e0e958
3
  size 75455810
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b9e8b71d1edb3e5fd7f93f13cf098bc846fedef444351f31f957cc9fb4d52c40
3
  size 75455810
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:882b179507ebc1af739e17444cb29d9c8e6428e189ae98ef0f166fb92fdff268
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3739c406ecf8641a3ed60442913b2ca4198babe9654ae86a7990d0c3f9a2542a
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:98ebdbbdc57822db6e58d9d3ead66e0d2c6005e998969732b2d5af225744b60b
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:28ac24de07ee3317eeb7358849fb34d11c4526fd90377fc538ffac93f31faefd
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.9950339721109327,
5
  "eval_steps": 500,
6
- "global_step": 321,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -2254,6 +2254,13 @@
2254
  "learning_rate": 2.486652202848827e-10,
2255
  "loss": 1.6111,
2256
  "step": 321
 
 
 
 
 
 
 
2257
  }
2258
  ],
2259
  "logging_steps": 1.0,
@@ -2268,12 +2275,12 @@
2268
  "should_evaluate": false,
2269
  "should_log": false,
2270
  "should_save": true,
2271
- "should_training_stop": false
2272
  },
2273
  "attributes": {}
2274
  }
2275
  },
2276
- "total_flos": 2.5393388307779543e+18,
2277
  "train_batch_size": 1,
2278
  "trial_name": null,
2279
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.9981337664165743,
5
  "eval_steps": 500,
6
+ "global_step": 322,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
2254
  "learning_rate": 2.486652202848827e-10,
2255
  "loss": 1.6111,
2256
  "step": 321
2257
+ },
2258
+ {
2259
+ "epoch": 0.9981337664165743,
2260
+ "grad_norm": 0.0545884370803833,
2261
+ "learning_rate": 0.0,
2262
+ "loss": 1.6211,
2263
+ "step": 322
2264
  }
2265
  ],
2266
  "logging_steps": 1.0,
 
2275
  "should_evaluate": false,
2276
  "should_log": false,
2277
  "should_save": true,
2278
+ "should_training_stop": true
2279
  },
2280
  "attributes": {}
2281
  }
2282
  },
2283
+ "total_flos": 2.547514766592221e+18,
2284
  "train_batch_size": 1,
2285
  "trial_name": null,
2286
  "trial_params": null