LamaDiab commited on
Commit
6b00e38
·
verified ·
1 Parent(s): b93bbd1

Training in progress, epoch 6, checkpoint

Browse files
checkpoint-21510/README.md CHANGED
@@ -366,22 +366,7 @@ You can finetune this model on your own dataset.
366
  ### Training Logs
367
  | Epoch | Step | Training Loss | Validation Loss | cosine_accuracy |
368
  |:------:|:-----:|:-------------:|:---------------:|:---------------:|
369
- | 0.0003 | 1 | 1.6763 | - | - |
370
- | 0.2789 | 1000 | 1.6731 | 1.2452 | 0.9435 |
371
- | 0.5579 | 2000 | 1.4545 | 1.1861 | 0.9491 |
372
- | 0.8368 | 3000 | 1.3514 | 1.1498 | 0.9530 |
373
- | 1.1158 | 4000 | 1.2499 | 1.1096 | 0.9535 |
374
- | 1.3947 | 5000 | 1.1977 | 1.0828 | 0.9589 |
375
- | 1.6736 | 6000 | 1.1468 | 1.0728 | 0.9585 |
376
- | 1.9526 | 7000 | 1.1011 | 1.0473 | 0.9593 |
377
- | 2.2315 | 8000 | 1.0401 | 1.0429 | 0.9575 |
378
- | 2.5105 | 9000 | 1.0196 | 1.0377 | 0.9599 |
379
- | 2.7894 | 10000 | 0.9939 | 1.0395 | 0.9592 |
380
- | 3.0683 | 11000 | 0.9579 | 1.0249 | 0.9612 |
381
- | 3.3473 | 12000 | 0.9437 | 1.0262 | 0.9600 |
382
- | 3.6262 | 13000 | 0.9279 | 1.0129 | 0.9618 |
383
- | 3.9052 | 14000 | 0.9184 | 1.0108 | 0.9621 |
384
- | 4.1841 | 15000 | 0.8807 | 1.0071 | 0.9614 |
385
  | 4.4630 | 16000 | 0.8802 | 1.0004 | 0.9613 |
386
  | 4.7420 | 17000 | 0.8752 | 1.0061 | 0.9617 |
387
  | 5.0209 | 18000 | 0.8628 | 1.0004 | 0.9629 |
 
366
  ### Training Logs
367
  | Epoch | Step | Training Loss | Validation Loss | cosine_accuracy |
368
  |:------:|:-----:|:-------------:|:---------------:|:---------------:|
369
+ | 4.1841 | 15000 | 0.8884 | 1.0071 | 0.9614 |
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
370
  | 4.4630 | 16000 | 0.8802 | 1.0004 | 0.9613 |
371
  | 4.7420 | 17000 | 0.8752 | 1.0061 | 0.9617 |
372
  | 5.0209 | 18000 | 0.8628 | 1.0004 | 0.9629 |
checkpoint-21510/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:77a8ca702070da5d882996fab55e9323da9eb35629d63dc8b914ab691d8584fc
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f1219bd9031f06886018f52413a10fcb6ac47375db53e4b9ff515c0e7adf9966
3
  size 14244
checkpoint-21510/trainer_state.json CHANGED
@@ -244,16 +244,16 @@
244
  "epoch": 4.184100418410042,
245
  "grad_norm": 4.519611358642578,
246
  "learning_rate": 5.964002440512509e-06,
247
- "loss": 0.8807,
248
  "step": 15000
249
  },
250
  {
251
  "epoch": 4.184100418410042,
252
  "eval_cosine_accuracy": 0.9614049792289734,
253
  "eval_loss": 1.0071080923080444,
254
- "eval_runtime": 22.3262,
255
- "eval_samples_per_second": 425.913,
256
- "eval_steps_per_second": 3.359,
257
  "step": 15000
258
  },
259
  {
@@ -267,9 +267,9 @@
267
  "epoch": 4.463040446304045,
268
  "eval_cosine_accuracy": 0.9612998366355896,
269
  "eval_loss": 1.0004464387893677,
270
- "eval_runtime": 23.5229,
271
- "eval_samples_per_second": 404.245,
272
- "eval_steps_per_second": 3.188,
273
  "step": 16000
274
  },
275
  {
@@ -283,9 +283,9 @@
283
  "epoch": 4.741980474198048,
284
  "eval_cosine_accuracy": 0.9617204666137695,
285
  "eval_loss": 1.0060843229293823,
286
- "eval_runtime": 22.4412,
287
- "eval_samples_per_second": 423.73,
288
- "eval_steps_per_second": 3.342,
289
  "step": 17000
290
  },
291
  {
@@ -299,9 +299,9 @@
299
  "epoch": 5.02092050209205,
300
  "eval_cosine_accuracy": 0.9628772735595703,
301
  "eval_loss": 1.0004209280014038,
302
- "eval_runtime": 22.3957,
303
- "eval_samples_per_second": 424.591,
304
- "eval_steps_per_second": 3.349,
305
  "step": 18000
306
  },
307
  {
@@ -315,9 +315,9 @@
315
  "epoch": 5.299860529986053,
316
  "eval_cosine_accuracy": 0.9621411561965942,
317
  "eval_loss": 1.0009299516677856,
318
- "eval_runtime": 22.4826,
319
- "eval_samples_per_second": 422.95,
320
- "eval_steps_per_second": 3.336,
321
  "step": 19000
322
  },
323
  {
@@ -331,9 +331,9 @@
331
  "epoch": 5.578800557880056,
332
  "eval_cosine_accuracy": 0.9630876183509827,
333
  "eval_loss": 0.9901958703994751,
334
- "eval_runtime": 22.677,
335
- "eval_samples_per_second": 419.323,
336
- "eval_steps_per_second": 3.307,
337
  "step": 20000
338
  },
339
  {
@@ -347,9 +347,9 @@
347
  "epoch": 5.857740585774058,
348
  "eval_cosine_accuracy": 0.9632979035377502,
349
  "eval_loss": 0.992326557636261,
350
- "eval_runtime": 22.6971,
351
- "eval_samples_per_second": 418.952,
352
- "eval_steps_per_second": 3.304,
353
  "step": 21000
354
  }
355
  ],
 
244
  "epoch": 4.184100418410042,
245
  "grad_norm": 4.519611358642578,
246
  "learning_rate": 5.964002440512509e-06,
247
+ "loss": 0.8884,
248
  "step": 15000
249
  },
250
  {
251
  "epoch": 4.184100418410042,
252
  "eval_cosine_accuracy": 0.9614049792289734,
253
  "eval_loss": 1.0071080923080444,
254
+ "eval_runtime": 21.2593,
255
+ "eval_samples_per_second": 447.287,
256
+ "eval_steps_per_second": 3.528,
257
  "step": 15000
258
  },
259
  {
 
267
  "epoch": 4.463040446304045,
268
  "eval_cosine_accuracy": 0.9612998366355896,
269
  "eval_loss": 1.0004464387893677,
270
+ "eval_runtime": 21.271,
271
+ "eval_samples_per_second": 447.04,
272
+ "eval_steps_per_second": 3.526,
273
  "step": 16000
274
  },
275
  {
 
283
  "epoch": 4.741980474198048,
284
  "eval_cosine_accuracy": 0.9617204666137695,
285
  "eval_loss": 1.0060843229293823,
286
+ "eval_runtime": 21.4196,
287
+ "eval_samples_per_second": 443.939,
288
+ "eval_steps_per_second": 3.501,
289
  "step": 17000
290
  },
291
  {
 
299
  "epoch": 5.02092050209205,
300
  "eval_cosine_accuracy": 0.9628772735595703,
301
  "eval_loss": 1.0004209280014038,
302
+ "eval_runtime": 21.3936,
303
+ "eval_samples_per_second": 444.479,
304
+ "eval_steps_per_second": 3.506,
305
  "step": 18000
306
  },
307
  {
 
315
  "epoch": 5.299860529986053,
316
  "eval_cosine_accuracy": 0.9621411561965942,
317
  "eval_loss": 1.0009299516677856,
318
+ "eval_runtime": 21.2856,
319
+ "eval_samples_per_second": 446.733,
320
+ "eval_steps_per_second": 3.524,
321
  "step": 19000
322
  },
323
  {
 
331
  "epoch": 5.578800557880056,
332
  "eval_cosine_accuracy": 0.9630876183509827,
333
  "eval_loss": 0.9901958703994751,
334
+ "eval_runtime": 22.6062,
335
+ "eval_samples_per_second": 420.636,
336
+ "eval_steps_per_second": 3.318,
337
  "step": 20000
338
  },
339
  {
 
347
  "epoch": 5.857740585774058,
348
  "eval_cosine_accuracy": 0.9632979035377502,
349
  "eval_loss": 0.992326557636261,
350
+ "eval_runtime": 21.2926,
351
+ "eval_samples_per_second": 446.587,
352
+ "eval_steps_per_second": 3.522,
353
  "step": 21000
354
  }
355
  ],
checkpoint-21510/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ddb1e0dab31f45e0e46ba20c97f0f69d3bc5247ba9f25051c28fed605796b0eb
3
  size 5752
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:93b1607a15af70cf6b32a232aa87f4ee1d3038e5a70922f63dc141b69792d409
3
  size 5752