SystemAdmin123 commited on
Commit
50846f4
·
verified ·
1 Parent(s): e8d157a

Training in progress, step 240, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c79b3d2dfa1e2ff08e34bee0a50e5d26df97d53fd00ae51087689ebaf5027fe9
3
  size 136062744
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:57aa943ba31b7fd0f2d2258b3638435908884af1e83b7e3da9763ba67b95fa40
3
  size 136062744
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3f5a1aea2e7e54386cf3ce389cec6fd8823514c6ea8045ebccf97c746d743cca
3
  size 272133748
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:78f609766fd9499e6af357bcd74eef24836222dee0149f07897fd8895e50aade
3
  size 272133748
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d2976a2f475d8edc9e9a00ed903ec6fa861e056646565524847948c22626d681
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:93e80a2275824ab49f6bc0b217bb315cd0a85d3c25b43a245828495794a78d4d
3
  size 15984
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b4a98cf07637306947ea7d3f67892a3b98b5c22007d4395c1de2047ef45cd95c
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:381eb7d8287a93e17a40cc15be93d534da9dbf37378fcc74868d5615daf19b34
3
  size 15984
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e06aedc7584f87414cddb2adf9cb46d6573a485741ffcbccef2e7d45ace8f8f8
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2d4d987ee650d278db90b1b49f5d5e57d81bba91b4e110659d4027a225f63078
3
  size 15984
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:af7c8c884b8c371ae21c399e557a74788ef7204c82d4d61283bc7a25749fb4a5
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1bfd04834fa55090f0aa6f19062eb69d5e7e7d567f3b51b2a09c93679da782f7
3
  size 15984
last-checkpoint/rng_state_4.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bf3bb25966486ea21e5c0eda07f93bcccad75e0a9e396cccc2d2b31e52284d21
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3e15a8fd81fd90d6fe35aa6feb35c5e13dd4fe18af2950ff7fcf4c6b68016d32
3
  size 15984
last-checkpoint/rng_state_5.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ec6fb749e5390815130c1196c45f456b136c4b36339acd1006b814b606a29cec
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a4d603165c2d1acc537a09a3e1f8f3831fbd36a555d1b4282034bf9a666af8e7
3
  size 15984
last-checkpoint/rng_state_6.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a6724cc3b6fa30b54ecf6969ff17f0fa6b8805e0ec29471ae97fdc0007eca256
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c3b4cdeeb7d7c2d37111aeb034296baee0b0b647a48bc49f1ac03a01bf25b677
3
  size 15984
last-checkpoint/rng_state_7.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e5db64dfbdb44b1f0e091abfb9b970cb0dc413d3122f9ce2b84c891c3041b677
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:887f0c6920abc07c1199dc922f55301b3e567adfbbf72707fdf5afb2c202b331
3
  size 15984
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6e961aafc4bb3a24acc13a66c3eb856682eb2bc992742878e58df3da341f94ce
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:941d9e9f4cfb6894bf574771af69c852f299b452dcf03e677dae3dadf692a003
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 3.7772511848341233,
5
  "eval_steps": 200,
6
- "global_step": 200,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -163,6 +163,34 @@
163
  "eval_samples_per_second": 1454.261,
164
  "eval_steps_per_second": 182.146,
165
  "step": 200
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
166
  }
167
  ],
168
  "logging_steps": 10,
@@ -182,7 +210,7 @@
182
  "attributes": {}
183
  }
184
  },
185
- "total_flos": 2.7236502626893824e+16,
186
  "train_batch_size": 1,
187
  "trial_name": null,
188
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 4.530805687203792,
5
  "eval_steps": 200,
6
+ "global_step": 240,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
163
  "eval_samples_per_second": 1454.261,
164
  "eval_steps_per_second": 182.146,
165
  "step": 200
166
+ },
167
+ {
168
+ "epoch": 3.966824644549763,
169
+ "grad_norm": 1.78125,
170
+ "learning_rate": 0.00019847451999183694,
171
+ "loss": 2.1714,
172
+ "step": 210
173
+ },
174
+ {
175
+ "epoch": 4.151658767772512,
176
+ "grad_norm": 3.453125,
177
+ "learning_rate": 0.00019824084210910925,
178
+ "loss": 2.0489,
179
+ "step": 220
180
+ },
181
+ {
182
+ "epoch": 4.341232227488152,
183
+ "grad_norm": 2.296875,
184
+ "learning_rate": 0.00019799067644341844,
185
+ "loss": 1.7888,
186
+ "step": 230
187
+ },
188
+ {
189
+ "epoch": 4.530805687203792,
190
+ "grad_norm": 1.9375,
191
+ "learning_rate": 0.0001977240649801253,
192
+ "loss": 2.055,
193
+ "step": 240
194
  }
195
  ],
196
  "logging_steps": 10,
 
210
  "attributes": {}
211
  }
212
  },
213
+ "total_flos": 3.267013366723379e+16,
214
  "train_batch_size": 1,
215
  "trial_name": null,
216
  "trial_params": null