Upload LoRA model and probe head for run Qwen2.5-7B-Instruct_layer_27

Browse files

Files changed (2) hide show

value_head_probes/Qwen2.5-7B-Instruct_layer_27/results.json +1 -1
value_head_probes/Qwen2.5-7B-Instruct_layer_27/training_config.json +3 -7

value_head_probes/Qwen2.5-7B-Instruct_layer_27/results.json CHANGED Viewed

@@ -1 +1 @@

- {"eval_metrics": {"eval_accuracy": 0.~~5802088513177523~~, "eval_precision": 0.~~6257100149476831~~, "eval_recall": 0.~~4134729355985776~~, "eval_f1": 0.~~49791840133222315~~, "eval_auc": 0.~~6244539075369286~~, "eval_lm_loss": 5.~~963947842337868~~, "eval_probe_loss": 0.~~9108255221085115~~, "eval_sparsity": 0.~~08431729403409091~~, "epoch": 4.0}, "train_metrics": {"train_accuracy": 0.~~9723739129252564~~, "train_precision": 0.~~6771163298743856~~, "train_recall": 0.~~7932181701855406~~, "train_f1": 0.~~7305833824395993~~, "train_auc": 0.~~9850187872878208~~, "train_lm_loss": 5.~~860074736521795~~, "train_probe_loss": 0.~~4101426233513615~~, "train_sparsity": 0.~~08815910143729969~~, "epoch": 4.0}}

+ {"eval_metrics": {"eval_accuracy": 0.7532583178175342, "eval_precision": 0.7565023137067177, "eval_recall": 0.7569854702219384, "eval_f1": 0.7567438148443735, "eval_auc": 0.8320366675082659, "eval_lm_loss": 3.6352814110842617, "eval_probe_loss": 0.550237946618687, "eval_sparsity": 0.09692604758522727, "epoch": 2.0}, "train_metrics": {"train_accuracy": 0.9767835414476249, "train_precision": 0.7776664630421503, "train_recall": 0.8277458451455174, "train_f1": 0.8019250636228488, "train_auc": 0.9909954663706676, "train_lm_loss": 3.6980934619903563, "train_probe_loss": 0.42955863563678204, "train_sparsity": 0.08134397849058493, "epoch": 2.0}}

value_head_probes/Qwen2.5-7B-Instruct_layer_27/training_config.json CHANGED Viewed

@@ -7,10 +7,10 @@
     "dtype": "torch.bfloat16",
     "overwrite_output_dir": true,
     "max_steps": -1,
-    "num_train_epochs": 4,
     "per_device_train_batch_size": 2,
     "per_device_eval_batch_size": 2,
-    "lambda_lm": 0.1,
     "logging_steps": 20,
     "eval_steps": 100,
     "overfit_mode": false,
@@ -42,11 +42,7 @@
         24,
         25,
         26,
-        27,
-        28,
-        29,
-        30,
-        31
     ],
     "lora_r": 32,
     "lora_alpha": 64,

     "dtype": "torch.bfloat16",
     "overwrite_output_dir": true,
     "max_steps": -1,
+    "num_train_epochs": 2,
     "per_device_train_batch_size": 2,
     "per_device_eval_batch_size": 2,
+    "lambda_lm": 0.0,
     "logging_steps": 20,
     "eval_steps": 100,
     "overfit_mode": false,
         24,
         25,
         26,
+        27
     ],
     "lora_r": 32,
     "lora_alpha": 64,