shorecode commited on
Commit
eca16c0
·
verified ·
1 Parent(s): f0ff607

Upload folder using huggingface_hub

Browse files
checkpoint-latest/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d32a18b0785d783b8f7ca68824fdec9d9a5c5f53e5ad57b00c7655d6e20ec21a
3
  size 62293080
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3fd29ac343b2e0582fb8a887937c055cdcc418a58a57805fd2e8f97c4a9b5902
3
  size 62293080
checkpoint-latest/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:26f5f20bc83ebedde1ae0dac318d82eccf4ac037d79e876f524681ea47ac2bd3
3
  size 124642443
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0f959a9f93fe8c29de8bad80e922cd938a2f1ded943f2da2478b6fa5c4adfd70
3
  size 124642443
checkpoint-latest/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c4b34564e8f5056ecea4875b26da43b7e79461939fc193402ad477fba74d3da7
3
  size 14645
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:019869f04ad9e65816198cf9bbc88e01065d43c5ae26d3c4dfa75569351f838e
3
  size 14645
checkpoint-latest/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:af2a155e857e9182a60853ae072465deebfe720a1b7475fedfc54c31656178bc
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a377768ec214c80211f84cf49de0b3a1987fbbc2efbe16a70dc0f558f050f0a7
3
  size 1465
checkpoint-latest/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 0.8798169980644026,
6
  "eval_steps": 500,
7
- "global_step": 5000,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -368,6 +368,78 @@
368
  "eval_samples_per_second": 108.89,
369
  "eval_steps_per_second": 4.19,
370
  "step": 5000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
371
  }
372
  ],
373
  "logging_steps": 125,
@@ -387,7 +459,7 @@
387
  "attributes": {}
388
  }
389
  },
390
- "total_flos": 2933564375040000.0,
391
  "train_batch_size": 26,
392
  "trial_name": null,
393
  "trial_params": null
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 1.055780397677283,
6
  "eval_steps": 500,
7
+ "global_step": 6000,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
368
  "eval_samples_per_second": 108.89,
369
  "eval_steps_per_second": 4.19,
370
  "step": 5000
371
+ },
372
+ {
373
+ "epoch": 0.9018124230160127,
374
+ "grad_norm": 0.6645826101303101,
375
+ "learning_rate": 0.00020983635403836,
376
+ "loss": 3.1946,
377
+ "step": 5125
378
+ },
379
+ {
380
+ "epoch": 0.9238078479676227,
381
+ "grad_norm": 0.7062675952911377,
382
+ "learning_rate": 0.00020763681154319902,
383
+ "loss": 3.2228,
384
+ "step": 5250
385
+ },
386
+ {
387
+ "epoch": 0.9458032729192328,
388
+ "grad_norm": 0.7374680638313293,
389
+ "learning_rate": 0.000205437269048038,
390
+ "loss": 3.1905,
391
+ "step": 5375
392
+ },
393
+ {
394
+ "epoch": 0.9677986978708428,
395
+ "grad_norm": 0.853108286857605,
396
+ "learning_rate": 0.000203237726552877,
397
+ "loss": 3.2011,
398
+ "step": 5500
399
+ },
400
+ {
401
+ "epoch": 0.9677986978708428,
402
+ "eval_loss": 2.714653491973877,
403
+ "eval_runtime": 90.854,
404
+ "eval_samples_per_second": 108.416,
405
+ "eval_steps_per_second": 4.172,
406
+ "step": 5500
407
+ },
408
+ {
409
+ "epoch": 0.9897941228224529,
410
+ "grad_norm": 0.6738927364349365,
411
+ "learning_rate": 0.000201038184057716,
412
+ "loss": 3.1888,
413
+ "step": 5625
414
+ },
415
+ {
416
+ "epoch": 1.011789547774063,
417
+ "grad_norm": 0.7420991659164429,
418
+ "learning_rate": 0.00019883864156255496,
419
+ "loss": 3.1835,
420
+ "step": 5750
421
+ },
422
+ {
423
+ "epoch": 1.0337849727256732,
424
+ "grad_norm": 0.8556201457977295,
425
+ "learning_rate": 0.00019663909906739396,
426
+ "loss": 3.1885,
427
+ "step": 5875
428
+ },
429
+ {
430
+ "epoch": 1.055780397677283,
431
+ "grad_norm": 0.7332561016082764,
432
+ "learning_rate": 0.00019443955657223294,
433
+ "loss": 3.1691,
434
+ "step": 6000
435
+ },
436
+ {
437
+ "epoch": 1.055780397677283,
438
+ "eval_loss": 2.692098617553711,
439
+ "eval_runtime": 90.3724,
440
+ "eval_samples_per_second": 108.993,
441
+ "eval_steps_per_second": 4.194,
442
+ "step": 6000
443
  }
444
  ],
445
  "logging_steps": 125,
 
459
  "attributes": {}
460
  }
461
  },
462
+ "total_flos": 3520096723009536.0,
463
  "train_batch_size": 26,
464
  "trial_name": null,
465
  "trial_params": null