Gaoussin commited on
Commit
fc1625b
·
verified ·
1 Parent(s): b03d130

Training in progress, step 60, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:791de39fd7b2bdff21473c42060283f782700709bf7b73c60d645581c8fe85a8
3
  size 180385008
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6db9d552d05fd4a02441b274102b06eb4448e823b6f11bbf626b7b2d36ec4589
3
  size 180385008
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4f25f5da0a81a0919880dbc8fa722b7a85800ee6cfbb9c18d56071e165ee12ff
3
  size 91850763
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a79c6a13851c6b6000b1c5fa96cc061a579e0f7b0b01508685b720f653fc9625
3
  size 91850763
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d1f3e429ffab9361eb588f03657ab12e499db270bb2234e5408b66a7fc8b7a88
3
  size 14645
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:637ef4c9ad794a6c6701f55f48eb4bd08385ff6ecc860fcf54937e38e8bf02e2
3
  size 14645
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6ae2ce584500d5a7dde62b4aada302acca5720fe80f8af7eb0bfd99d0917fc53
3
  size 1383
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8c146ae1cf47c9929c1f0cc98e903ce1070f0c3ea64421f26971b053d42844b7
3
  size 1383
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3427c4310ecbb2e90c59ae2422bb29ed3ca1f49696ca5f645ad56a90b60b8cb0
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:22ae51817158590b7adfad82fb9a3380e5197063501e610f9eaa5c6decb93fd2
3
  size 1465
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 0.3873239436619718,
6
  "eval_steps": 5,
7
- "global_step": 55,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -173,6 +173,21 @@
173
  "eval_samples_per_second": 36.927,
174
  "eval_steps_per_second": 9.232,
175
  "step": 55
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
176
  }
177
  ],
178
  "logging_steps": 5,
@@ -187,12 +202,12 @@
187
  "should_evaluate": false,
188
  "should_log": false,
189
  "should_save": true,
190
- "should_training_stop": false
191
  },
192
  "attributes": {}
193
  }
194
  },
195
- "total_flos": 1169628078428160.0,
196
  "train_batch_size": 2,
197
  "trial_name": null,
198
  "trial_params": null
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 0.4225352112676056,
6
  "eval_steps": 5,
7
+ "global_step": 60,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
173
  "eval_samples_per_second": 36.927,
174
  "eval_steps_per_second": 9.232,
175
  "step": 55
176
+ },
177
+ {
178
+ "epoch": 0.4225352112676056,
179
+ "grad_norm": 0.31429925560951233,
180
+ "learning_rate": 3.636363636363636e-06,
181
+ "loss": 3.8459,
182
+ "step": 60
183
+ },
184
+ {
185
+ "epoch": 0.4225352112676056,
186
+ "eval_loss": 5.81058406829834,
187
+ "eval_runtime": 1.5942,
188
+ "eval_samples_per_second": 37.636,
189
+ "eval_steps_per_second": 9.409,
190
+ "step": 60
191
  }
192
  ],
193
  "logging_steps": 5,
 
202
  "should_evaluate": false,
203
  "should_log": false,
204
  "should_save": true,
205
+ "should_training_stop": true
206
  },
207
  "attributes": {}
208
  }
209
  },
210
+ "total_flos": 1267677987962880.0,
211
  "train_batch_size": 2,
212
  "trial_name": null,
213
  "trial_params": null