shorecode commited on
Commit
096836f
·
verified ·
1 Parent(s): 86cc00b

Upload folder using huggingface_hub

Browse files
checkpoint-latest/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ca52a8b3016d203a936beddbcf117be824418aa83105a5b7c66cb6ee5e03d904
3
  size 62293080
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:987cdddaddd6ee9efcbf0406b5dbea003e19c76523f179ea3826ca2f5707f3cd
3
  size 62293080
checkpoint-latest/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3caa7486f45749552621cbfc1449a5f803a04e5ca4e0bb6a48abed87b6ae7f9c
3
  size 124642443
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b4ea40a6da395d8e9289cbe3e64187c17945564d3b3cb832aeaf878ad5504ff8
3
  size 124642443
checkpoint-latest/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b9ab924d1dd6f7d0d0a987395af92fe2c207d68f4ed0046e34205bf59aa0a3e7
3
  size 14645
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2b7133fcea647449b2caaa65223ebbc0c180189bbd59dbe842634047836d81e2
3
  size 14645
checkpoint-latest/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:733b6a45fd8cebd94fb50b1fb141fd687689caaa2ac76b306c6159a539b547e5
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ac8a8c1f0ca136b116df977b16704d013fad49cab357d7231675ec3945e85ad2
3
  size 1465
checkpoint-latest/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 0.35192679922576103,
6
  "eval_steps": 500,
7
- "global_step": 2000,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -152,6 +152,78 @@
152
  "eval_samples_per_second": 107.876,
153
  "eval_steps_per_second": 4.151,
154
  "step": 2000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
155
  }
156
  ],
157
  "logging_steps": 125,
@@ -171,7 +243,7 @@
171
  "attributes": {}
172
  }
173
  },
174
- "total_flos": 1173425750016000.0,
175
  "train_batch_size": 26,
176
  "trial_name": null,
177
  "trial_params": null
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 0.5278901988386415,
6
  "eval_steps": 500,
7
+ "global_step": 3000,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
152
  "eval_samples_per_second": 107.876,
153
  "eval_steps_per_second": 4.151,
154
  "step": 2000
155
+ },
156
+ {
157
+ "epoch": 0.3739222241773711,
158
+ "grad_norm": 0.6693059802055359,
159
+ "learning_rate": 0.00026262537392222413,
160
+ "loss": 3.3749,
161
+ "step": 2125
162
+ },
163
+ {
164
+ "epoch": 0.3959176491289812,
165
+ "grad_norm": 0.8750757575035095,
166
+ "learning_rate": 0.00026042583142706316,
167
+ "loss": 3.3698,
168
+ "step": 2250
169
+ },
170
+ {
171
+ "epoch": 0.41791307408059125,
172
+ "grad_norm": 0.8450427651405334,
173
+ "learning_rate": 0.00025822628893190213,
174
+ "loss": 3.3556,
175
+ "step": 2375
176
+ },
177
+ {
178
+ "epoch": 0.4399084990322013,
179
+ "grad_norm": 0.660953938961029,
180
+ "learning_rate": 0.00025602674643674116,
181
+ "loss": 3.3503,
182
+ "step": 2500
183
+ },
184
+ {
185
+ "epoch": 0.4399084990322013,
186
+ "eval_loss": 2.8230228424072266,
187
+ "eval_runtime": 91.2675,
188
+ "eval_samples_per_second": 107.924,
189
+ "eval_steps_per_second": 4.153,
190
+ "step": 2500
191
+ },
192
+ {
193
+ "epoch": 0.46190392398381136,
194
+ "grad_norm": 0.7277446389198303,
195
+ "learning_rate": 0.00025382720394158013,
196
+ "loss": 3.358,
197
+ "step": 2625
198
+ },
199
+ {
200
+ "epoch": 0.4838993489354214,
201
+ "grad_norm": 0.7908076047897339,
202
+ "learning_rate": 0.0002516276614464191,
203
+ "loss": 3.3433,
204
+ "step": 2750
205
+ },
206
+ {
207
+ "epoch": 0.5058947738870315,
208
+ "grad_norm": 0.847932755947113,
209
+ "learning_rate": 0.00024942811895125813,
210
+ "loss": 3.3363,
211
+ "step": 2875
212
+ },
213
+ {
214
+ "epoch": 0.5278901988386415,
215
+ "grad_norm": 0.7486276626586914,
216
+ "learning_rate": 0.0002472285764560971,
217
+ "loss": 3.3197,
218
+ "step": 3000
219
+ },
220
+ {
221
+ "epoch": 0.5278901988386415,
222
+ "eval_loss": 2.7938477993011475,
223
+ "eval_runtime": 91.2355,
224
+ "eval_samples_per_second": 107.962,
225
+ "eval_steps_per_second": 4.154,
226
+ "step": 3000
227
  }
228
  ],
229
  "logging_steps": 125,
 
243
  "attributes": {}
244
  }
245
  },
246
+ "total_flos": 1760138625024000.0,
247
  "train_batch_size": 26,
248
  "trial_name": null,
249
  "trial_params": null