Upload LoRA model and probe head for run Qwen2.5-7B-Instruct_layer_27
Browse files
value_head_probes/Qwen2.5-7B-Instruct_layer_27/results.json
CHANGED
|
@@ -1 +1 @@
|
|
| 1 |
-
{"eval_metrics": {"eval_accuracy": 0.
|
|
|
|
| 1 |
+
{"eval_metrics": {"eval_accuracy": 0.7532583178175342, "eval_precision": 0.7565023137067177, "eval_recall": 0.7569854702219384, "eval_f1": 0.7567438148443735, "eval_auc": 0.8320366675082659, "eval_lm_loss": 3.6352814110842617, "eval_probe_loss": 0.550237946618687, "eval_sparsity": 0.09692604758522727, "epoch": 2.0}, "train_metrics": {"train_accuracy": 0.9767835414476249, "train_precision": 0.7776664630421503, "train_recall": 0.8277458451455174, "train_f1": 0.8019250636228488, "train_auc": 0.9909954663706676, "train_lm_loss": 3.6980934619903563, "train_probe_loss": 0.42955863563678204, "train_sparsity": 0.08134397849058493, "epoch": 2.0}}
|
value_head_probes/Qwen2.5-7B-Instruct_layer_27/training_config.json
CHANGED
|
@@ -7,10 +7,10 @@
|
|
| 7 |
"dtype": "torch.bfloat16",
|
| 8 |
"overwrite_output_dir": true,
|
| 9 |
"max_steps": -1,
|
| 10 |
-
"num_train_epochs":
|
| 11 |
"per_device_train_batch_size": 2,
|
| 12 |
"per_device_eval_batch_size": 2,
|
| 13 |
-
"lambda_lm": 0.
|
| 14 |
"logging_steps": 20,
|
| 15 |
"eval_steps": 100,
|
| 16 |
"overfit_mode": false,
|
|
@@ -42,11 +42,7 @@
|
|
| 42 |
24,
|
| 43 |
25,
|
| 44 |
26,
|
| 45 |
-
27
|
| 46 |
-
28,
|
| 47 |
-
29,
|
| 48 |
-
30,
|
| 49 |
-
31
|
| 50 |
],
|
| 51 |
"lora_r": 32,
|
| 52 |
"lora_alpha": 64,
|
|
|
|
| 7 |
"dtype": "torch.bfloat16",
|
| 8 |
"overwrite_output_dir": true,
|
| 9 |
"max_steps": -1,
|
| 10 |
+
"num_train_epochs": 2,
|
| 11 |
"per_device_train_batch_size": 2,
|
| 12 |
"per_device_eval_batch_size": 2,
|
| 13 |
+
"lambda_lm": 0.0,
|
| 14 |
"logging_steps": 20,
|
| 15 |
"eval_steps": 100,
|
| 16 |
"overfit_mode": false,
|
|
|
|
| 42 |
24,
|
| 43 |
25,
|
| 44 |
26,
|
| 45 |
+
27
|
|
|
|
|
|
|
|
|
|
|
|
|
| 46 |
],
|
| 47 |
"lora_r": 32,
|
| 48 |
"lora_alpha": 64,
|