shorecode commited on
Commit
9216405
·
verified ·
1 Parent(s): 07268ec

Upload folder using huggingface_hub

Browse files
checkpoint-latest/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ef513511cab07aae2229c81db3abdc654a0b3295a3c2d11cbd3d6499af06ee82
3
  size 62293080
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6f75126c2a34822f1bd2758385a8fd1ca1669f4dff9de1317e4f5b25887d0a0d
3
  size 62293080
checkpoint-latest/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:28fc9a9bbe9b73a2b142749dd94f1bfc0455d15809207712e61bb52c170e192c
3
  size 124642443
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1b94ba484a1c3a45fc8e9c69eeda0cff023c2452e3377a4ee8a9ae36f58df97f
3
  size 124642443
checkpoint-latest/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b9ab924d1dd6f7d0d0a987395af92fe2c207d68f4ed0046e34205bf59aa0a3e7
3
  size 14645
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2b7133fcea647449b2caaa65223ebbc0c180189bbd59dbe842634047836d81e2
3
  size 14645
checkpoint-latest/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:09920373f9c73ff341f9b497a1508313129ce162304c3799289a6c23932f7385
3
  size 1383
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e26664050c6d90fb565d76db26661576aa404ce53418da0b68344264e2ee7e47
3
  size 1383
checkpoint-latest/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:733b6a45fd8cebd94fb50b1fb141fd687689caaa2ac76b306c6159a539b547e5
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ac8a8c1f0ca136b116df977b16704d013fad49cab357d7231675ec3945e85ad2
3
  size 1465
checkpoint-latest/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 0.35192679922576103,
6
  "eval_steps": 500,
7
- "global_step": 2000,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -152,6 +152,78 @@
152
  "eval_samples_per_second": 167.039,
153
  "eval_steps_per_second": 6.427,
154
  "step": 2000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
155
  }
156
  ],
157
  "logging_steps": 125,
@@ -171,7 +243,7 @@
171
  "attributes": {}
172
  }
173
  },
174
- "total_flos": 1173425750016000.0,
175
  "train_batch_size": 26,
176
  "trial_name": null,
177
  "trial_params": null
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 0.5278901988386415,
6
  "eval_steps": 500,
7
+ "global_step": 3000,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
152
  "eval_samples_per_second": 167.039,
153
  "eval_steps_per_second": 6.427,
154
  "step": 2000
155
+ },
156
+ {
157
+ "epoch": 0.3739222241773711,
158
+ "grad_norm": 0.7473997473716736,
159
+ "learning_rate": 0.00026262537392222413,
160
+ "loss": 3.072,
161
+ "step": 2125
162
+ },
163
+ {
164
+ "epoch": 0.3959176491289812,
165
+ "grad_norm": 0.8357605338096619,
166
+ "learning_rate": 0.00026042583142706316,
167
+ "loss": 3.073,
168
+ "step": 2250
169
+ },
170
+ {
171
+ "epoch": 0.41791307408059125,
172
+ "grad_norm": 0.6772239804267883,
173
+ "learning_rate": 0.00025822628893190213,
174
+ "loss": 3.0631,
175
+ "step": 2375
176
+ },
177
+ {
178
+ "epoch": 0.4399084990322013,
179
+ "grad_norm": 0.8163031339645386,
180
+ "learning_rate": 0.00025602674643674116,
181
+ "loss": 3.0659,
182
+ "step": 2500
183
+ },
184
+ {
185
+ "epoch": 0.4399084990322013,
186
+ "eval_loss": 2.623256206512451,
187
+ "eval_runtime": 59.1416,
188
+ "eval_samples_per_second": 166.55,
189
+ "eval_steps_per_second": 6.408,
190
+ "step": 2500
191
+ },
192
+ {
193
+ "epoch": 0.46190392398381136,
194
+ "grad_norm": 0.7648818492889404,
195
+ "learning_rate": 0.00025382720394158013,
196
+ "loss": 3.0765,
197
+ "step": 2625
198
+ },
199
+ {
200
+ "epoch": 0.4838993489354214,
201
+ "grad_norm": 0.8686987161636353,
202
+ "learning_rate": 0.0002516276614464191,
203
+ "loss": 3.075,
204
+ "step": 2750
205
+ },
206
+ {
207
+ "epoch": 0.5058947738870315,
208
+ "grad_norm": 0.721097469329834,
209
+ "learning_rate": 0.00024942811895125813,
210
+ "loss": 3.1497,
211
+ "step": 2875
212
+ },
213
+ {
214
+ "epoch": 0.5278901988386415,
215
+ "grad_norm": 0.0,
216
+ "learning_rate": 0.0002472285764560971,
217
+ "loss": 3.9227,
218
+ "step": 3000
219
+ },
220
+ {
221
+ "epoch": 0.5278901988386415,
222
+ "eval_loss": 3.1224141120910645,
223
+ "eval_runtime": 59.1848,
224
+ "eval_samples_per_second": 166.428,
225
+ "eval_steps_per_second": 6.404,
226
+ "step": 3000
227
  }
228
  ],
229
  "logging_steps": 125,
 
243
  "attributes": {}
244
  }
245
  },
246
+ "total_flos": 1760138625024000.0,
247
  "train_batch_size": 26,
248
  "trial_name": null,
249
  "trial_params": null