abdo-Mansour commited on
Commit
e106873
·
verified ·
1 Parent(s): 811cab5

Training in progress, step 800, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1bbedb7a31d95d8d1b2490b8b28c441ca323e99c258d1384164387363cfdac1a
3
  size 161533160
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ba09a28dfe6c190885d36a748d6d4f606001c428e0ca75dc33ab0d4e2d44b997
3
  size 161533160
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1add654e0ea460afcfd9053beb5eaeb1ce2bc3f9e227ab20eaa85fbc9c38c987
3
  size 323298107
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2fe0ea1fb40a0dbff711bf22868ea28098368008bf45c321edc6170d0ca527c7
3
  size 323298107
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:67f523d06e9b3de29a9502a254755eca5e74e9ceb5a4017f452eb1406d0f0e41
3
  size 14917
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2632fce5aa2eaa97af9fae3ad958ab987fb27ed2b126487356b218293af2592e
3
  size 14917
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:746b3116a1b87bb2926b4d95142b6e208878ab769efed0bb65ee08710fa6901d
3
  size 14917
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0e2ce1fb1d65981d4c19d899ab47cdabeff74c74db29571a6c3208a13f2fe09f
3
  size 14917
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:820ac3603f892e5dcd7b46df919a32995e52534ba0c4399503f0836c348452fc
3
  size 1383
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f763907f0cbf59aa161087688f1a78f56f71e53e8dfeea7ff2de48b9be6de630
3
  size 1383
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:69e8a4610861bc7ffbbca01ffefc787d1ef5581d77904a0eb29981d8a999b0dd
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e6e8826d343e1d8f8985fc7998cb7ad286f4d69b1f5a3396d943985a019b6928
3
  size 1465
last-checkpoint/trainer_state.json CHANGED
@@ -1,10 +1,10 @@
1
  {
2
- "best_global_step": 750,
3
- "best_metric": 0.28498101234436035,
4
- "best_model_checkpoint": "/kaggle/working/Llama-Factory-out/checkpoint-750",
5
- "epoch": 1.769321533923304,
6
  "eval_steps": 50,
7
- "global_step": 750,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -653,6 +653,49 @@
653
  "eval_samples_per_second": 3.138,
654
  "eval_steps_per_second": 0.785,
655
  "step": 750
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
656
  }
657
  ],
658
  "logging_steps": 10,
@@ -672,7 +715,7 @@
672
  "attributes": {}
673
  }
674
  },
675
- "total_flos": 1.7992304177656627e+17,
676
  "train_batch_size": 2,
677
  "trial_name": null,
678
  "trial_params": null
 
1
  {
2
+ "best_global_step": 800,
3
+ "best_metric": 0.2848590016365051,
4
+ "best_model_checkpoint": "/kaggle/working/Llama-Factory-out/checkpoint-800",
5
+ "epoch": 1.887315634218289,
6
  "eval_steps": 50,
7
+ "global_step": 800,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
653
  "eval_samples_per_second": 3.138,
654
  "eval_steps_per_second": 0.785,
655
  "step": 750
656
+ },
657
+ {
658
+ "epoch": 1.7929203539823009,
659
+ "grad_norm": 0.9809938669204712,
660
+ "learning_rate": 5.643573569362806e-07,
661
+ "loss": 0.1819,
662
+ "step": 760
663
+ },
664
+ {
665
+ "epoch": 1.8165191740412978,
666
+ "grad_norm": 3.2049953937530518,
667
+ "learning_rate": 4.4571905531273924e-07,
668
+ "loss": 0.2087,
669
+ "step": 770
670
+ },
671
+ {
672
+ "epoch": 1.840117994100295,
673
+ "grad_norm": 1.921066403388977,
674
+ "learning_rate": 3.4073337635982153e-07,
675
+ "loss": 0.2,
676
+ "step": 780
677
+ },
678
+ {
679
+ "epoch": 1.8637168141592921,
680
+ "grad_norm": 1.9557863473892212,
681
+ "learning_rate": 2.495782788388865e-07,
682
+ "loss": 0.1669,
683
+ "step": 790
684
+ },
685
+ {
686
+ "epoch": 1.887315634218289,
687
+ "grad_norm": 3.648777484893799,
688
+ "learning_rate": 1.7240827761718658e-07,
689
+ "loss": 0.1845,
690
+ "step": 800
691
+ },
692
+ {
693
+ "epoch": 1.887315634218289,
694
+ "eval_loss": 0.2848590016365051,
695
+ "eval_runtime": 376.2036,
696
+ "eval_samples_per_second": 3.137,
697
+ "eval_steps_per_second": 0.784,
698
+ "step": 800
699
  }
700
  ],
701
  "logging_steps": 10,
 
715
  "attributes": {}
716
  }
717
  },
718
+ "total_flos": 1.921058365023191e+17,
719
  "train_batch_size": 2,
720
  "trial_name": null,
721
  "trial_params": null