Fanucci commited on
Commit
3d9be26
·
verified ·
1 Parent(s): 674480b

Training in progress, step 200, checkpoint

Browse files
last-checkpoint/model-00001-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0eec55da26257e6bdd7e6c8e36ecffb5a79021aeb51150dc5b63a96d4ddfa618
3
  size 4995335576
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:126f178711fbc9dc02adb12aecf7c2e316048690bf9f666ebf5e533924ca5759
3
  size 4995335576
last-checkpoint/model-00002-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c5618717dfd829dd7d8596a68eaff8f83a9c2c913dcbefd2a76b79925f393192
3
  size 1857639032
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:49a6a27ff45511a566f230b94929be575d47e80f94f11e46684241a9807432ad
3
  size 1857639032
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f8ae689a4a2512f4173eeb1a100be1c1ac54ba038fbc329032c57fb26365379e
3
  size 13706103974
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a0c0971e1ca60fceb5fa43e0a83baf4624d8bf23bd31003292a1606de1c74861
3
  size 13706103974
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ecefbb3f17bb76b6655eb0157c98b5287c17fa4b4c72a6b9068b0823ce9fd18d
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:95b6047bd8cc6f4cdf7c46dea47edb8e542435510070c6cd1e0a7d9ccf5fd7da
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:76d2b2a58cb1fef054c26ee40b50f34fb3a71e56ece66a18947891aede843123
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:95d2a50364bc7f3078838455f012433d9b89075114f0547db53c7e328906dec9
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": 1,
3
  "best_metric": 1.4945952892303467,
4
  "best_model_checkpoint": null,
5
- "epoch": 0.009369144284821987,
6
  "eval_steps": 50,
7
- "global_step": 150,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -145,6 +145,49 @@
145
  "eval_samples_per_second": 15.379,
146
  "eval_steps_per_second": 15.379,
147
  "step": 150
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
148
  }
149
  ],
150
  "logging_steps": 10,
@@ -159,7 +202,7 @@
159
  "early_stopping_threshold": 0.0
160
  },
161
  "attributes": {
162
- "early_stopping_patience_counter": 3
163
  }
164
  },
165
  "TrainerControl": {
@@ -173,7 +216,7 @@
173
  "attributes": {}
174
  }
175
  },
176
- "total_flos": 6126932459520000.0,
177
  "train_batch_size": 1,
178
  "trial_name": null,
179
  "trial_params": null
 
2
  "best_global_step": 1,
3
  "best_metric": 1.4945952892303467,
4
  "best_model_checkpoint": null,
5
+ "epoch": 0.012492192379762648,
6
  "eval_steps": 50,
7
+ "global_step": 200,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
145
  "eval_samples_per_second": 15.379,
146
  "eval_steps_per_second": 15.379,
147
  "step": 150
148
+ },
149
+ {
150
+ "epoch": 0.009993753903810118,
151
+ "grad_norm": 26.0,
152
+ "learning_rate": 0.04725712682570498,
153
+ "loss": 10.5338,
154
+ "step": 160
155
+ },
156
+ {
157
+ "epoch": 0.010618363522798251,
158
+ "grad_norm": 28.125,
159
+ "learning_rate": 0.046884695883873395,
160
+ "loss": 9.3137,
161
+ "step": 170
162
+ },
163
+ {
164
+ "epoch": 0.011242973141786383,
165
+ "grad_norm": 37.75,
166
+ "learning_rate": 0.04649022890837298,
167
+ "loss": 9.4597,
168
+ "step": 180
169
+ },
170
+ {
171
+ "epoch": 0.011867582760774516,
172
+ "grad_norm": 46.75,
173
+ "learning_rate": 0.046074123094010544,
174
+ "loss": 9.584,
175
+ "step": 190
176
+ },
177
+ {
178
+ "epoch": 0.012492192379762648,
179
+ "grad_norm": 48.25,
180
+ "learning_rate": 0.04563679742406935,
181
+ "loss": 9.6385,
182
+ "step": 200
183
+ },
184
+ {
185
+ "epoch": 0.012492192379762648,
186
+ "eval_loss": 9.662774085998535,
187
+ "eval_runtime": 53.0984,
188
+ "eval_samples_per_second": 15.876,
189
+ "eval_steps_per_second": 15.876,
190
+ "step": 200
191
  }
192
  ],
193
  "logging_steps": 10,
 
202
  "early_stopping_threshold": 0.0
203
  },
204
  "attributes": {
205
+ "early_stopping_patience_counter": 4
206
  }
207
  },
208
  "TrainerControl": {
 
216
  "attributes": {}
217
  }
218
  },
219
+ "total_flos": 8169243279360000.0,
220
  "train_batch_size": 1,
221
  "trial_name": null,
222
  "trial_params": null