Upload checkpoint checkpoint-350

Browse files

Files changed (9) hide show

checkpoint-350/README.md +9 -1
checkpoint-350/adapter_config.json +2 -0
checkpoint-350/adapter_model.safetensors +1 -1
checkpoint-350/optimizer.pt +1 -1
checkpoint-350/rng_state.pth +1 -1
checkpoint-350/scaler.pt +1 -1
checkpoint-350/scheduler.pt +1 -1
checkpoint-350/trainer_state.json +142 -142
checkpoint-350/training_args.bin +1 -1

checkpoint-350/README.md CHANGED Viewed

@@ -1,6 +1,14 @@
 ---
 base_model: unsloth/gemma-3n-e4b-it-unsloth-bnb-4bit
 library_name: peft
 ---
 # Model Card for Model ID
@@ -199,4 +207,4 @@ Carbon emissions can be estimated using the [Machine Learning Impact calculator]
 [More Information Needed]
 ### Framework versions
-- PEFT 0.15.2

 ---
 base_model: unsloth/gemma-3n-e4b-it-unsloth-bnb-4bit
 library_name: peft
+pipeline_tag: text-generation
+tags:
+- base_model:adapter:unsloth/gemma-3n-e4b-it-unsloth-bnb-4bit
+- lora
+- sft
+- transformers
+- trl
+- unsloth
 ---
 # Model Card for Model ID
 [More Information Needed]
 ### Framework versions
+- PEFT 0.16.0

checkpoint-350/adapter_config.json CHANGED Viewed

@@ -20,6 +20,7 @@
   "megatron_core": "megatron.core",
   "modules_to_save": null,
   "peft_type": "LORA",
   "r": 64,
   "rank_pattern": {},
   "revision": null,
@@ -27,5 +28,6 @@
   "task_type": "CAUSAL_LM",
   "trainable_token_indices": null,
   "use_dora": false,
   "use_rslora": false
 }

   "megatron_core": "megatron.core",
   "modules_to_save": null,
   "peft_type": "LORA",
+  "qalora_group_size": 16,
   "r": 64,
   "rank_pattern": {},
   "revision": null,
   "task_type": "CAUSAL_LM",
   "trainable_token_indices": null,
   "use_dora": false,
+  "use_qalora": false,
   "use_rslora": false
 }

checkpoint-350/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ed34c742a062137185cf9a341f5dabb889c5d3a55d9f0d4b5418093363b2835b
 size 614801160

 version https://git-lfs.github.com/spec/v1
+oid sha256:2be9000d0113c7bb8c3fb993672ae03bd817348a83f13a0314406645048165c7
 size 614801160

checkpoint-350/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:94f5861ace1d92e7b4cec384cff6340cd128c1983cd2e2561be9bde03b902f37
 size 314017998

 version https://git-lfs.github.com/spec/v1
+oid sha256:081ca991d80a3e709679f51738d09c3c9d38c3eeb5dc78925a2887a16b522362
 size 314017998

checkpoint-350/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:3d7ecf15e83ac4d18e0d90f8a44821af2f304313a6ae05eeb21767226a79c463
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:e4ae31f3bd6abd5e088309ad57fa2e995bc6dd61c02221bc158a3d63e6ad1f06
 size 14244

checkpoint-350/scaler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:9b6c30ef093fa2dd3d81a7e4ac493646dc32b312671f5091f4043a25d83c434a
 size 988

 version https://git-lfs.github.com/spec/v1
+oid sha256:3a80ac1076ee6a590a8afc8f7c2e173aeb236819a68cd950eb3e2d5227eadb56
 size 988

checkpoint-350/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f54f61cd21e7715c2d1c98c82263f221a334d1b450170bfc3ffdd4457e178d3d
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:c165b4db31643b30ea8c9aef92480c1c2793ff9eb23d3b7bd85759601ce62c1c
 size 1064

checkpoint-350/trainer_state.json CHANGED Viewed

@@ -2,7 +2,7 @@
   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 2.1404580152671757,
   "eval_steps": 500,
   "global_step": 350,
   "is_hyper_param_search": false,
@@ -10,255 +10,255 @@
   "is_world_process_zero": true,
   "log_history": [
     {
-      "epoch": 0.061068702290076333,
       "grad_norm": NaN,
       "learning_rate": 0.0,
-      "loss": 9.249,
       "step": 10
     },
     {
-      "epoch": 0.12213740458015267,
-      "grad_norm": 4165304.25,
-      "learning_rate": 7.317073170731707e-09,
-      "loss": 9.3268,
       "step": 20
     },
     {
-      "epoch": 0.183206106870229,
-      "grad_norm": 52283.171875,
-      "learning_rate": 3.170731707317073e-08,
-      "loss": 5.2487,
       "step": 30
     },
     {
-      "epoch": 0.24427480916030533,
-      "grad_norm": 34552.171875,
-      "learning_rate": 5.609756097560976e-08,
-      "loss": 3.5598,
       "step": 40
     },
     {
-      "epoch": 0.3053435114503817,
-      "grad_norm": 26069.314453125,
-      "learning_rate": 8.048780487804878e-08,
-      "loss": 3.2428,
       "step": 50
     },
     {
-      "epoch": 0.366412213740458,
-      "grad_norm": 17445.56640625,
-      "learning_rate": 1.048780487804878e-07,
-      "loss": 3.2248,
       "step": 60
     },
     {
-      "epoch": 0.42748091603053434,
-      "grad_norm": 23311.046875,
-      "learning_rate": 1.2926829268292682e-07,
-      "loss": 3.2595,
       "step": 70
     },
     {
-      "epoch": 0.48854961832061067,
-      "grad_norm": 13832.998046875,
-      "learning_rate": 1.5365853658536586e-07,
-      "loss": 3.0941,
       "step": 80
     },
     {
-      "epoch": 0.549618320610687,
-      "grad_norm": 11702.7421875,
-      "learning_rate": 1.7804878048780488e-07,
-      "loss": 3.0741,
       "step": 90
     },
     {
-      "epoch": 0.6106870229007634,
-      "grad_norm": 11529.9228515625,
-      "learning_rate": 1.997289972899729e-07,
-      "loss": 3.163,
       "step": 100
     },
     {
-      "epoch": 0.6717557251908397,
-      "grad_norm": 9445.1396484375,
-      "learning_rate": 1.970189701897019e-07,
-      "loss": 3.0271,
       "step": 110
     },
     {
-      "epoch": 0.732824427480916,
-      "grad_norm": 9955.53515625,
-      "learning_rate": 1.9430894308943088e-07,
-      "loss": 2.9452,
       "step": 120
     },
     {
-      "epoch": 0.7938931297709924,
-      "grad_norm": 8468.791015625,
-      "learning_rate": 1.915989159891599e-07,
-      "loss": 2.9369,
       "step": 130
     },
     {
-      "epoch": 0.8549618320610687,
-      "grad_norm": 9535.78125,
-      "learning_rate": 1.8888888888888888e-07,
-      "loss": 2.8495,
       "step": 140
     },
     {
-      "epoch": 0.916030534351145,
-      "grad_norm": 6875.52783203125,
-      "learning_rate": 1.861788617886179e-07,
-      "loss": 2.9174,
       "step": 150
     },
     {
-      "epoch": 0.9770992366412213,
-      "grad_norm": 7618.35107421875,
-      "learning_rate": 1.8346883468834688e-07,
-      "loss": 2.8059,
       "step": 160
     },
     {
-      "epoch": 1.036641221374046,
-      "grad_norm": 7838.251953125,
-      "learning_rate": 1.8075880758807586e-07,
-      "loss": 2.7811,
       "step": 170
     },
     {
-      "epoch": 1.0977099236641221,
-      "grad_norm": 7657.27880859375,
-      "learning_rate": 1.7804878048780488e-07,
-      "loss": 2.7413,
       "step": 180
     },
     {
-      "epoch": 1.1587786259541986,
-      "grad_norm": 7777.9345703125,
-      "learning_rate": 1.753387533875339e-07,
-      "loss": 2.7186,
       "step": 190
     },
     {
-      "epoch": 1.2198473282442748,
-      "grad_norm": 8100.41259765625,
-      "learning_rate": 1.7262872628726285e-07,
-      "loss": 2.6789,
       "step": 200
     },
     {
-      "epoch": 1.2809160305343512,
-      "grad_norm": 6984.42822265625,
-      "learning_rate": 1.6991869918699186e-07,
-      "loss": 2.608,
       "step": 210
     },
     {
-      "epoch": 1.3419847328244274,
-      "grad_norm": 7442.26806640625,
-      "learning_rate": 1.6720867208672087e-07,
-      "loss": 2.5542,
       "step": 220
     },
     {
-      "epoch": 1.4030534351145039,
-      "grad_norm": 7562.8271484375,
-      "learning_rate": 1.6449864498644986e-07,
-      "loss": 2.7031,
       "step": 230
     },
     {
-      "epoch": 1.46412213740458,
-      "grad_norm": 7235.27392578125,
-      "learning_rate": 1.6178861788617885e-07,
-      "loss": 2.6075,
       "step": 240
     },
     {
-      "epoch": 1.5251908396946565,
-      "grad_norm": 7698.4599609375,
-      "learning_rate": 1.5907859078590786e-07,
-      "loss": 2.5737,
       "step": 250
     },
     {
-      "epoch": 1.5862595419847327,
-      "grad_norm": 6776.5927734375,
-      "learning_rate": 1.5636856368563685e-07,
-      "loss": 2.6098,
       "step": 260
     },
     {
-      "epoch": 1.6473282442748092,
-      "grad_norm": 6810.216796875,
-      "learning_rate": 1.5365853658536586e-07,
-      "loss": 2.6596,
       "step": 270
     },
     {
-      "epoch": 1.7083969465648856,
-      "grad_norm": 8227.4892578125,
-      "learning_rate": 1.5094850948509485e-07,
-      "loss": 2.5461,
       "step": 280
     },
     {
-      "epoch": 1.7694656488549618,
-      "grad_norm": 6727.93212890625,
-      "learning_rate": 1.4823848238482383e-07,
-      "loss": 2.5579,
       "step": 290
     },
     {
-      "epoch": 1.830534351145038,
-      "grad_norm": 7243.64111328125,
-      "learning_rate": 1.4552845528455284e-07,
-      "loss": 2.5538,
       "step": 300
     },
     {
-      "epoch": 1.8916030534351145,
-      "grad_norm": 6177.71240234375,
-      "learning_rate": 1.4281842818428186e-07,
-      "loss": 2.4577,
       "step": 310
     },
     {
-      "epoch": 1.952671755725191,
-      "grad_norm": 7574.3271484375,
-      "learning_rate": 1.4010840108401082e-07,
-      "loss": 2.4778,
       "step": 320
     },
     {
-      "epoch": 2.018320610687023,
-      "grad_norm": 6852.73095703125,
-      "learning_rate": 1.3739837398373983e-07,
-      "loss": 2.7076,
       "step": 330
     },
     {
-      "epoch": 2.0793893129770993,
-      "grad_norm": 6963.75927734375,
-      "learning_rate": 1.3468834688346884e-07,
-      "loss": 2.3829,
       "step": 340
     },
     {
-      "epoch": 2.1404580152671757,
-      "grad_norm": 7274.21630859375,
-      "learning_rate": 1.3197831978319783e-07,
-      "loss": 2.4186,
       "step": 350
     }
   ],
   "logging_steps": 10,
-  "max_steps": 820,
   "num_input_tokens_seen": 0,
-  "num_train_epochs": 5,
   "save_steps": 50,
   "stateful_callbacks": {
     "TrainerControl": {
@@ -272,7 +272,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 2.585680433386243e+16,
   "train_batch_size": 5,
   "trial_name": null,
   "trial_params": null

   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 1.8324607329842932,
   "eval_steps": 500,
   "global_step": 350,
   "is_hyper_param_search": false,
   "is_world_process_zero": true,
   "log_history": [
     {
+      "epoch": 0.05235602094240838,
       "grad_norm": NaN,
       "learning_rate": 0.0,
+      "loss": 9.7408,
       "step": 10
     },
     {
+      "epoch": 0.10471204188481675,
+      "grad_norm": 202209.765625,
+      "learning_rate": 3.1413612565445024e-08,
+      "loss": 9.1548,
       "step": 20
     },
     {
+      "epoch": 0.15706806282722513,
+      "grad_norm": 27189.787109375,
+      "learning_rate": 1.3612565445026178e-07,
+      "loss": 3.8451,
       "step": 30
     },
     {
+      "epoch": 0.2094240837696335,
+      "grad_norm": 10457.34765625,
+      "learning_rate": 2.4083769633507854e-07,
+      "loss": 3.267,
       "step": 40
     },
     {
+      "epoch": 0.2617801047120419,
+      "grad_norm": 8087.2939453125,
+      "learning_rate": 3.4554973821989523e-07,
+      "loss": 3.0939,
       "step": 50
     },
     {
+      "epoch": 0.31413612565445026,
+      "grad_norm": 7214.744140625,
+      "learning_rate": 4.50261780104712e-07,
+      "loss": 3.0211,
       "step": 60
     },
     {
+      "epoch": 0.36649214659685864,
+      "grad_norm": 6162.826171875,
+      "learning_rate": 5.549738219895288e-07,
+      "loss": 2.846,
       "step": 70
     },
     {
+      "epoch": 0.418848167539267,
+      "grad_norm": 4688.05615234375,
+      "learning_rate": 6.596858638743455e-07,
+      "loss": 2.8104,
       "step": 80
     },
     {
+      "epoch": 0.4712041884816754,
+      "grad_norm": 3856.7578125,
+      "learning_rate": 7.643979057591623e-07,
+      "loss": 2.8735,
       "step": 90
     },
     {
+      "epoch": 0.5235602094240838,
+      "grad_norm": 3529.413330078125,
+      "learning_rate": 8.691099476439791e-07,
+      "loss": 2.8117,
       "step": 100
     },
     {
+      "epoch": 0.5759162303664922,
+      "grad_norm": 2830.52734375,
+      "learning_rate": 9.738219895287958e-07,
+      "loss": 2.7099,
       "step": 110
     },
     {
+      "epoch": 0.6282722513089005,
+      "grad_norm": 2316.537353515625,
+      "learning_rate": 1.0785340314136124e-06,
+      "loss": 2.6387,
       "step": 120
     },
     {
+      "epoch": 0.680628272251309,
+      "grad_norm": 2685.246826171875,
+      "learning_rate": 1.1832460732984293e-06,
+      "loss": 2.6667,
       "step": 130
     },
     {
+      "epoch": 0.7329842931937173,
+      "grad_norm": 2066.593017578125,
+      "learning_rate": 1.2879581151832458e-06,
+      "loss": 2.5786,
       "step": 140
     },
     {
+      "epoch": 0.7853403141361257,
+      "grad_norm": 2110.41748046875,
+      "learning_rate": 1.3926701570680628e-06,
+      "loss": 2.4927,
       "step": 150
     },
     {
+      "epoch": 0.837696335078534,
+      "grad_norm": 1557.745849609375,
+      "learning_rate": 1.4973821989528795e-06,
+      "loss": 2.6125,
       "step": 160
     },
     {
+      "epoch": 0.8900523560209425,
+      "grad_norm": 1510.9991455078125,
+      "learning_rate": 1.6020942408376963e-06,
+      "loss": 2.5048,
       "step": 170
     },
     {
+      "epoch": 0.9424083769633508,
+      "grad_norm": 1395.5841064453125,
+      "learning_rate": 1.706806282722513e-06,
+      "loss": 2.5049,
       "step": 180
     },
     {
+      "epoch": 0.9947643979057592,
+      "grad_norm": 1400.4466552734375,
+      "learning_rate": 1.8115183246073297e-06,
+      "loss": 2.4902,
       "step": 190
     },
     {
+      "epoch": 1.0471204188481675,
+      "grad_norm": 1328.171142578125,
+      "learning_rate": 1.9162303664921463e-06,
+      "loss": 2.3063,
       "step": 200
     },
     {
+      "epoch": 1.0994764397905759,
+      "grad_norm": 1169.1490478515625,
+      "learning_rate": 1.997673065735893e-06,
+      "loss": 2.3826,
       "step": 210
     },
     {
+      "epoch": 1.1518324607329844,
+      "grad_norm": 1007.3028564453125,
+      "learning_rate": 1.9860383944153577e-06,
+      "loss": 2.2646,
       "step": 220
     },
     {
+      "epoch": 1.2041884816753927,
+      "grad_norm": 905.8086547851562,
+      "learning_rate": 1.9744037230948225e-06,
+      "loss": 2.3065,
       "step": 230
     },
     {
+      "epoch": 1.256544502617801,
+      "grad_norm": 904.2677001953125,
+      "learning_rate": 1.9627690517742874e-06,
+      "loss": 2.369,
       "step": 240
     },
     {
+      "epoch": 1.3089005235602094,
+      "grad_norm": 878.70751953125,
+      "learning_rate": 1.951134380453752e-06,
+      "loss": 2.2916,
       "step": 250
     },
     {
+      "epoch": 1.3612565445026177,
+      "grad_norm": 785.525146484375,
+      "learning_rate": 1.9394997091332166e-06,
+      "loss": 2.2916,
       "step": 260
     },
     {
+      "epoch": 1.4136125654450262,
+      "grad_norm": 715.8485107421875,
+      "learning_rate": 1.927865037812682e-06,
+      "loss": 2.247,
       "step": 270
     },
     {
+      "epoch": 1.4659685863874345,
+      "grad_norm": 742.1319580078125,
+      "learning_rate": 1.9162303664921463e-06,
+      "loss": 2.2293,
       "step": 280
     },
     {
+      "epoch": 1.518324607329843,
+      "grad_norm": 777.41259765625,
+      "learning_rate": 1.9045956951716113e-06,
+      "loss": 2.1447,
       "step": 290
     },
     {
+      "epoch": 1.5706806282722514,
+      "grad_norm": 693.8157348632812,
+      "learning_rate": 1.8929610238510761e-06,
+      "loss": 2.1851,
       "step": 300
     },
     {
+      "epoch": 1.6230366492146597,
+      "grad_norm": 707.2672119140625,
+      "learning_rate": 1.881326352530541e-06,
+      "loss": 2.1879,
       "step": 310
     },
     {
+      "epoch": 1.675392670157068,
+      "grad_norm": 727.61767578125,
+      "learning_rate": 1.8696916812100056e-06,
+      "loss": 2.1962,
       "step": 320
     },
     {
+      "epoch": 1.7277486910994764,
+      "grad_norm": 695.4833984375,
+      "learning_rate": 1.8580570098894706e-06,
+      "loss": 2.2057,
       "step": 330
     },
     {
+      "epoch": 1.7801047120418847,
+      "grad_norm": 614.199462890625,
+      "learning_rate": 1.8464223385689352e-06,
+      "loss": 2.0654,
       "step": 340
     },
     {
+      "epoch": 1.8324607329842932,
+      "grad_norm": 724.0316162109375,
+      "learning_rate": 1.8347876672484e-06,
+      "loss": 2.0803,
       "step": 350
     }
   ],
   "logging_steps": 10,
+  "max_steps": 1910,
   "num_input_tokens_seen": 0,
+  "num_train_epochs": 10,
   "save_steps": 50,
   "stateful_callbacks": {
     "TrainerControl": {
       "attributes": {}
     }
   },
+  "total_flos": 2.664823984891824e+16,
   "train_batch_size": 5,
   "trial_name": null,
   "trial_params": null

checkpoint-350/training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:757d9ed6a271cc7dd663b202a023b2731e235bf47955a4f40ff4c18331f20ba4
 size 5816

 version https://git-lfs.github.com/spec/v1
+oid sha256:5a98a6f231f28700315fa8e9cb612a94ae1e99d1ff2b7795e1d31ff2c428a5d2
 size 5816