error577 commited on
Commit
17e32e7
·
verified ·
1 Parent(s): 20aa30e

Training in progress, step 40, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8d94fa881b998ac1accb6c412a65b1baaf6300bd8612b2bc5d1f31d5fea56d64
3
  size 40036040
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b85235b9fd06fc2b7556026c9d400735ea1cf6ceb01413094a4e13a4f220409c
3
  size 40036040
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4fffc8a41db92b8618dd62624a488da399f904230b3289a134921e61bf6a155a
3
  size 20814996
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cd89e21dd25e4404711c18b21837d9f7f514a8add2433a4536b637bc67095f19
3
  size 20814996
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bd4900ec0ebdc120a0be2d88bb820715c1e79c97114ea1a2c3e152e6ed02a60e
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9cc25fc0d4a476d263c6b1dbecc0b805055d4f792509dbda83e19cdd50420ee0
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:db8e23e4b636e6d4cfbe5cdf1b076811f8e1f9d4c97603e891aee3c95c35c66b
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5a75dcc5ec09eada6641b366eac390a2a47e7ec4306b94cfdb718bc9a73ac9b0
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.0007137058283002203,
5
  "eval_steps": 20,
6
- "global_step": 20,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -163,6 +163,154 @@
163
  "eval_samples_per_second": 11.493,
164
  "eval_steps_per_second": 11.493,
165
  "step": 20
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
166
  }
167
  ],
168
  "logging_steps": 1,
@@ -182,7 +330,7 @@
182
  "attributes": {}
183
  }
184
  },
185
- "total_flos": 537780794425344.0,
186
  "train_batch_size": 1,
187
  "trial_name": null,
188
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.0014274116566004406,
5
  "eval_steps": 20,
6
+ "global_step": 40,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
163
  "eval_samples_per_second": 11.493,
164
  "eval_steps_per_second": 11.493,
165
  "step": 20
166
+ },
167
+ {
168
+ "epoch": 0.0007493911197152314,
169
+ "grad_norm": 3.009274482727051,
170
+ "learning_rate": 0.00029893625775634835,
171
+ "loss": 3.1292,
172
+ "step": 21
173
+ },
174
+ {
175
+ "epoch": 0.0007850764111302424,
176
+ "grad_norm": 4.148017406463623,
177
+ "learning_rate": 0.0002987343436093454,
178
+ "loss": 3.0939,
179
+ "step": 22
180
+ },
181
+ {
182
+ "epoch": 0.0008207617025452535,
183
+ "grad_norm": 2.899528980255127,
184
+ "learning_rate": 0.00029851497482766547,
185
+ "loss": 2.8648,
186
+ "step": 23
187
+ },
188
+ {
189
+ "epoch": 0.0008564469939602644,
190
+ "grad_norm": 3.6243538856506348,
191
+ "learning_rate": 0.00029827817715520773,
192
+ "loss": 3.1808,
193
+ "step": 24
194
+ },
195
+ {
196
+ "epoch": 0.0008921322853752754,
197
+ "grad_norm": 4.4735894203186035,
198
+ "learning_rate": 0.0002980239783812289,
199
+ "loss": 2.9876,
200
+ "step": 25
201
+ },
202
+ {
203
+ "epoch": 0.0009278175767902865,
204
+ "grad_norm": 3.8239352703094482,
205
+ "learning_rate": 0.0002977524083370822,
206
+ "loss": 3.6976,
207
+ "step": 26
208
+ },
209
+ {
210
+ "epoch": 0.0009635028682052975,
211
+ "grad_norm": 3.177990436553955,
212
+ "learning_rate": 0.00029746349889271645,
213
+ "loss": 2.5572,
214
+ "step": 27
215
+ },
216
+ {
217
+ "epoch": 0.0009991881596203085,
218
+ "grad_norm": 2.7317492961883545,
219
+ "learning_rate": 0.0002971572839529358,
220
+ "loss": 2.6316,
221
+ "step": 28
222
+ },
223
+ {
224
+ "epoch": 0.0010348734510353195,
225
+ "grad_norm": 4.169151306152344,
226
+ "learning_rate": 0.00029683379945342125,
227
+ "loss": 3.3164,
228
+ "step": 29
229
+ },
230
+ {
231
+ "epoch": 0.0010705587424503305,
232
+ "grad_norm": 4.244836807250977,
233
+ "learning_rate": 0.000296493083356513,
234
+ "loss": 2.8311,
235
+ "step": 30
236
+ },
237
+ {
238
+ "epoch": 0.0011062440338653415,
239
+ "grad_norm": 3.5411789417266846,
240
+ "learning_rate": 0.00029613517564675565,
241
+ "loss": 2.5447,
242
+ "step": 31
243
+ },
244
+ {
245
+ "epoch": 0.0011419293252803526,
246
+ "grad_norm": 3.9989023208618164,
247
+ "learning_rate": 0.0002957601183262058,
248
+ "loss": 2.6177,
249
+ "step": 32
250
+ },
251
+ {
252
+ "epoch": 0.0011776146166953636,
253
+ "grad_norm": 3.9558351039886475,
254
+ "learning_rate": 0.000295367955409503,
255
+ "loss": 2.5028,
256
+ "step": 33
257
+ },
258
+ {
259
+ "epoch": 0.0012132999081103746,
260
+ "grad_norm": 4.089743614196777,
261
+ "learning_rate": 0.00029495873291870436,
262
+ "loss": 3.1483,
263
+ "step": 34
264
+ },
265
+ {
266
+ "epoch": 0.0012489851995253856,
267
+ "grad_norm": 4.250207901000977,
268
+ "learning_rate": 0.0002945324988778834,
269
+ "loss": 2.8495,
270
+ "step": 35
271
+ },
272
+ {
273
+ "epoch": 0.0012846704909403965,
274
+ "grad_norm": 5.241243839263916,
275
+ "learning_rate": 0.00029408930330749477,
276
+ "loss": 3.318,
277
+ "step": 36
278
+ },
279
+ {
280
+ "epoch": 0.0013203557823554077,
281
+ "grad_norm": 3.5582404136657715,
282
+ "learning_rate": 0.0002936291982185036,
283
+ "loss": 2.5436,
284
+ "step": 37
285
+ },
286
+ {
287
+ "epoch": 0.0013560410737704187,
288
+ "grad_norm": 4.2042622566223145,
289
+ "learning_rate": 0.00029315223760628217,
290
+ "loss": 2.849,
291
+ "step": 38
292
+ },
293
+ {
294
+ "epoch": 0.0013917263651854297,
295
+ "grad_norm": 3.4244472980499268,
296
+ "learning_rate": 0.00029265847744427303,
297
+ "loss": 2.2284,
298
+ "step": 39
299
+ },
300
+ {
301
+ "epoch": 0.0014274116566004406,
302
+ "grad_norm": 5.198617458343506,
303
+ "learning_rate": 0.00029214797567742035,
304
+ "loss": 2.3866,
305
+ "step": 40
306
+ },
307
+ {
308
+ "epoch": 0.0014274116566004406,
309
+ "eval_loss": 2.7816474437713623,
310
+ "eval_runtime": 199.5889,
311
+ "eval_samples_per_second": 11.464,
312
+ "eval_steps_per_second": 11.464,
313
+ "step": 40
314
  }
315
  ],
316
  "logging_steps": 1,
 
330
  "attributes": {}
331
  }
332
  },
333
+ "total_flos": 769818193035264.0,
334
  "train_batch_size": 1,
335
  "trial_name": null,
336
  "trial_params": null