SystemAdmin123 commited on
Commit
1d276ae
·
verified ·
1 Parent(s): c6dbd0c

Training in progress, step 240, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0fbfedb128c3a8a90f3c52d5cb8498cfc4a1cc5679a309d2ba9e24f4e116613c
3
  size 250490408
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f317c2f9c0e1904127f0b0ce934aa07af2b7229ad58b9705ab9439b52063d79b
3
  size 250490408
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8aaa39867ef9a795fc2fde473bb2d03cac009f05ada558ae7564b3b55853713c
3
  size 255265850
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ef4793eaa5e68cb9e4770c48af11f8b45736f6d545fc0af49f5658489d799143
3
  size 255265850
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7333eda3fb157e4797c9458ec4e20fb25f3cea65d779dd1876520831c6fa7301
3
  size 14512
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dc199d50ee7acd60b0e618740a19710d9cb29c2225fb47541ee35fdc79228cad
3
  size 14512
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bddfe446a5a284cd678d00b6a2a398b1ec19fbab21ddba1394b97157f0314350
3
  size 14512
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:64f87fdf6af8f44bb6d6b07cf2d135d04fedbac4a4a784047f9932793daf148b
3
  size 14512
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:134925db3ae71cb88cb92756cc2cdea3066935b4ecbf838e78bbfa60c95c3818
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:108ec0968af236f6199c1f716a40487225162213cb414b717c6f1aeb82bcbc29
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 18.333333333333332,
5
  "eval_steps": 40,
6
- "global_step": 220,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -209,6 +209,28 @@
209
  "learning_rate": 0.00015000000000000001,
210
  "loss": 3.1968,
211
  "step": 220
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
212
  }
213
  ],
214
  "logging_steps": 10,
@@ -228,7 +250,7 @@
228
  "attributes": {}
229
  }
230
  },
231
- "total_flos": 1.425922867593216e+16,
232
  "train_batch_size": 32,
233
  "trial_name": null,
234
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 20.0,
5
  "eval_steps": 40,
6
+ "global_step": 240,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
209
  "learning_rate": 0.00015000000000000001,
210
  "loss": 3.1968,
211
  "step": 220
212
+ },
213
+ {
214
+ "epoch": 19.166666666666668,
215
+ "grad_norm": 4.03125,
216
+ "learning_rate": 0.00014515333583108896,
217
+ "loss": 3.1189,
218
+ "step": 230
219
+ },
220
+ {
221
+ "epoch": 20.0,
222
+ "grad_norm": 3.75,
223
+ "learning_rate": 0.00014016954246529696,
224
+ "loss": 3.0702,
225
+ "step": 240
226
+ },
227
+ {
228
+ "epoch": 20.0,
229
+ "eval_loss": 3.0596697330474854,
230
+ "eval_runtime": 10.3849,
231
+ "eval_samples_per_second": 144.537,
232
+ "eval_steps_per_second": 2.311,
233
+ "step": 240
234
  }
235
  ],
236
  "logging_steps": 10,
 
250
  "attributes": {}
251
  }
252
  },
253
+ "total_flos": 1.5547920996630528e+16,
254
  "train_batch_size": 32,
255
  "trial_name": null,
256
  "trial_params": null