KBayoud commited on
Commit
b5f99e7
·
verified ·
1 Parent(s): 5207358

Upload checkpoint-600

Browse files
model-00001-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:eb2471ba3abe3d3fa5eab5d1ec36a5139ec3204056d368c04a27e7b764540983
3
  size 4966315264
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7c0eb8256abd61f22bcffbcec610d3c210e32ec6939a01419fa52e72abc831c3
3
  size 4966315264
model-00002-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:69ed888f60f8c8b74a774618c12d20ab334d9e71673855e95692aa9fda355ca2
3
  size 1183919744
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:61bb1d2426ffeffdb7b41bb9f8ad70de7b954c8d105546b657b0ecd82e90e083
3
  size 1183919744
optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cadc2e24275e7bf4c4229d8b1707a21d7ae592cb695965691eb66374fcef8d92
3
  size 12300683155
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:da32da7f2111b60955ad152993e4e09f5ff9ef40ee4fc472e1a78a637e27a28b
3
  size 12300683155
rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1255b82ec9f462fef6f8c2f4a8ee876c4b6d998a53c99546a4eddcc1aabaefc7
3
  size 14645
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:236304ae89e49aae8260113165ee63419b9b745f79120014328a7fa31ed79b42
3
  size 14645
scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:72544eaaaca4b1790853dfcc172732f6c7f2ccdcf51bea7b6ec949c193251661
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:23756909803a9fdd0e1e0f7083cd1a77d433834eb7dafbf07ba536044bccc4fc
3
  size 1465
trainer_state.json CHANGED
@@ -1,10 +1,10 @@
1
  {
2
- "best_global_step": 550,
3
- "best_metric": 3.166327953338623,
4
- "best_model_checkpoint": "KBayoud/SmolLM3-3B-bs-16-lr-0.0001-ep-2-wp-0.1-gacc-32-gnm-1.0-FP16-mx-2048-v0.1/checkpoint-550",
5
- "epoch": 0.08961622868432174,
6
  "eval_steps": 50,
7
- "global_step": 550,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -481,6 +481,49 @@
481
  "eval_samples_per_second": 13.051,
482
  "eval_steps_per_second": 0.816,
483
  "step": 550
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
484
  }
485
  ],
486
  "logging_steps": 10,
@@ -500,7 +543,7 @@
500
  "attributes": {}
501
  }
502
  },
503
- "total_flos": 9.731854941605069e+18,
504
  "train_batch_size": 16,
505
  "trial_name": null,
506
  "trial_params": null
 
1
  {
2
+ "best_global_step": 600,
3
+ "best_metric": 3.139875650405884,
4
+ "best_model_checkpoint": "KBayoud/SmolLM3-3B-bs-16-lr-0.0001-ep-2-wp-0.1-gacc-32-gnm-1.0-FP16-mx-2048-v0.1/checkpoint-600",
5
+ "epoch": 0.09776315856471463,
6
  "eval_steps": 50,
7
+ "global_step": 600,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
481
  "eval_samples_per_second": 13.051,
482
  "eval_steps_per_second": 0.816,
483
  "step": 550
484
+ },
485
+ {
486
+ "epoch": 0.09124561466040032,
487
+ "grad_norm": 2.625,
488
+ "learning_rate": 4.5521172638436485e-05,
489
+ "loss": 2.9931,
490
+ "step": 560
491
+ },
492
+ {
493
+ "epoch": 0.0928750006364789,
494
+ "grad_norm": 2.859375,
495
+ "learning_rate": 4.633550488599348e-05,
496
+ "loss": 3.0269,
497
+ "step": 570
498
+ },
499
+ {
500
+ "epoch": 0.09450438661255747,
501
+ "grad_norm": 2.53125,
502
+ "learning_rate": 4.714983713355049e-05,
503
+ "loss": 3.0161,
504
+ "step": 580
505
+ },
506
+ {
507
+ "epoch": 0.09613377258863605,
508
+ "grad_norm": 2.453125,
509
+ "learning_rate": 4.796416938110749e-05,
510
+ "loss": 2.9851,
511
+ "step": 590
512
+ },
513
+ {
514
+ "epoch": 0.09776315856471463,
515
+ "grad_norm": 2.6875,
516
+ "learning_rate": 4.8778501628664496e-05,
517
+ "loss": 2.9918,
518
+ "step": 600
519
+ },
520
+ {
521
+ "epoch": 0.09776315856471463,
522
+ "eval_loss": 3.139875650405884,
523
+ "eval_runtime": 59897.1239,
524
+ "eval_samples_per_second": 13.115,
525
+ "eval_steps_per_second": 0.82,
526
+ "step": 600
527
  }
528
  ],
529
  "logging_steps": 10,
 
543
  "attributes": {}
544
  }
545
  },
546
+ "total_flos": 1.061656902720553e+19,
547
  "train_batch_size": 16,
548
  "trial_name": null,
549
  "trial_params": null