Upload folder using huggingface_hub

Browse files

Files changed (7) hide show

adapter_config.json +4 -4
adapter_model.safetensors +1 -1
optimizer.pt +1 -1
rng_state.pth +1 -1
scheduler.pt +1 -1
trainer_state.json +81 -18
training_args.bin +1 -1

adapter_config.json CHANGED Viewed

@@ -25,13 +25,13 @@
   "rank_pattern": {},
   "revision": null,
   "target_modules": [
-    "gate_proj",
-    "down_proj",
     "o_proj",
-    "up_proj",
     "k_proj",
     "q_proj",
-    "v_proj"
   ],
   "target_parameters": null,
   "task_type": "CAUSAL_LM",

   "rank_pattern": {},
   "revision": null,
   "target_modules": [
     "o_proj",
+    "v_proj",
+    "gate_proj",
     "k_proj",
+    "down_proj",
     "q_proj",
+    "up_proj"
   ],
   "target_parameters": null,
   "task_type": "CAUSAL_LM",

adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:03cc0484974038386f70d77c46f5aac3b533c7bced6962979281fe333c94d025
 size 664584480

 version https://git-lfs.github.com/spec/v1
+oid sha256:79def310c55b37cd8259a02e3faace2907e9317b665c755507979147e8030cfd
 size 664584480

optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:875f3b2ab1fcb4f6e9f7c489f0befe3d16113bb96aa2b87fff811d3c44fc13d1
 size 1329377575

 version https://git-lfs.github.com/spec/v1
+oid sha256:428eb21aeeadb3f413515d892ed4fbbf367950def367306a9420106e1bd5f77a
 size 1329377575

rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:326c0bca4395f2945822b3c5c26887d8851ac679204da1bb6c4d1f291262d7f1
 size 14645

 version https://git-lfs.github.com/spec/v1
+oid sha256:9b9e176b8f2be84b6dc94b0764395d41a6cae49568e6336c00495dfadd4a8a56
 size 14645

scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:6f2a49e35cdb54fb4cb47a212fa58478891c6773b4d4d385b309321fbef45523
 size 1465

 version https://git-lfs.github.com/spec/v1
+oid sha256:6302d42699f02b238748e313b2253d63e288a72f6ded5bf4047590e79ba04256
 size 1465

trainer_state.json CHANGED Viewed

@@ -1,34 +1,97 @@
 {
-  "best_global_step": 20,
-  "best_metric": 0.10567178577184677,
-  "best_model_checkpoint": "/content/models/gemma_jigsaw_instruction/checkpoint-20",
-  "epoch": 0.39408866995073893,
   "eval_steps": 20,
-  "global_step": 20,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
   "log_history": [
     {
-      "entropy": 1.9691255465718627,
       "epoch": 0.39408866995073893,
-      "grad_norm": 1.3069469928741455,
       "learning_rate": 8.758169934640524e-06,
-      "loss": 0.1069,
-      "mean_token_accuracy": 0.9541643948955391,
-      "num_tokens": 626062.0,
       "step": 20
     },
     {
       "epoch": 0.39408866995073893,
-      "eval_entropy": 1.9961118835669298,
-      "eval_loss": 0.10567178577184677,
-      "eval_mean_token_accuracy": 0.9499198725590339,
-      "eval_num_tokens": 626062.0,
-      "eval_runtime": 4.3895,
-      "eval_samples_per_second": 46.247,
-      "eval_steps_per_second": 5.923,
       "step": 20
     }
   ],
   "logging_steps": 20,
@@ -48,7 +111,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 1404085029875712.0,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
+  "best_global_step": 80,
+  "best_metric": 0.10693139582872391,
+  "best_model_checkpoint": "/content/models/gemma_jigsaw_instruction/checkpoint-80",
+  "epoch": 1.5714285714285714,
   "eval_steps": 20,
+  "global_step": 80,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
   "log_history": [
     {
+      "entropy": 2.0030554473400115,
       "epoch": 0.39408866995073893,
+      "grad_norm": 2.5103867053985596,
       "learning_rate": 8.758169934640524e-06,
+      "loss": 0.3751,
+      "mean_token_accuracy": 0.859375,
+      "num_tokens": 72532.0,
       "step": 20
     },
     {
       "epoch": 0.39408866995073893,
+      "eval_entropy": 2.080273380646339,
+      "eval_loss": 0.14906036853790283,
+      "eval_mean_token_accuracy": 0.9230769230769231,
+      "eval_num_tokens": 72532.0,
+      "eval_runtime": 4.4741,
+      "eval_samples_per_second": 45.372,
+      "eval_steps_per_second": 5.811,
       "step": 20
+    },
+    {
+      "entropy": 2.026494912803173,
+      "epoch": 0.7881773399014779,
+      "grad_norm": 8.653420448303223,
+      "learning_rate": 7.450980392156863e-06,
+      "loss": 0.1611,
+      "mean_token_accuracy": 0.916015625,
+      "num_tokens": 145048.0,
+      "step": 40
+    },
+    {
+      "epoch": 0.7881773399014779,
+      "eval_entropy": 2.0305226903695326,
+      "eval_loss": 0.16201180219650269,
+      "eval_mean_token_accuracy": 0.9254807692307693,
+      "eval_num_tokens": 145048.0,
+      "eval_runtime": 4.4358,
+      "eval_samples_per_second": 45.764,
+      "eval_steps_per_second": 5.861,
+      "step": 40
+    },
+    {
+      "entropy": 2.0197064559670945,
+      "epoch": 1.1773399014778325,
+      "grad_norm": 2.4835453033447266,
+      "learning_rate": 6.143790849673204e-06,
+      "loss": 0.1224,
+      "mean_token_accuracy": 0.9418512658227848,
+      "num_tokens": 216334.0,
+      "step": 60
+    },
+    {
+      "epoch": 1.1773399014778325,
+      "eval_entropy": 2.0450107271854696,
+      "eval_loss": 0.1152966096997261,
+      "eval_mean_token_accuracy": 0.9407051274409661,
+      "eval_num_tokens": 216334.0,
+      "eval_runtime": 4.4391,
+      "eval_samples_per_second": 45.73,
+      "eval_steps_per_second": 5.857,
+      "step": 60
+    },
+    {
+      "entropy": 1.9920476481318474,
+      "epoch": 1.5714285714285714,
+      "grad_norm": 5.939174175262451,
+      "learning_rate": 4.836601307189543e-06,
+      "loss": 0.0992,
+      "mean_token_accuracy": 0.95625,
+      "num_tokens": 289742.0,
+      "step": 80
+    },
+    {
+      "epoch": 1.5714285714285714,
+      "eval_entropy": 2.0168408017892103,
+      "eval_loss": 0.10693139582872391,
+      "eval_mean_token_accuracy": 0.9499198725590339,
+      "eval_num_tokens": 289742.0,
+      "eval_runtime": 4.4376,
+      "eval_samples_per_second": 45.746,
+      "eval_steps_per_second": 5.859,
+      "step": 80
     }
   ],
   "logging_steps": 20,
       "attributes": {}
     }
   },
+  "total_flos": 5508744073159680.0,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c61e50d4f9446fcee0ac9cf1d49d4ab9df79d5ea238b486457c023a9ad8b210c
 size 6353

 version https://git-lfs.github.com/spec/v1
+oid sha256:c16b42437cdd8867c8a12eb5f0a4290982935695952cbda1ebb8366981cfa51f
 size 6353