JoshuaFreeman commited on
Commit
e26a7fd
·
verified ·
1 Parent(s): 51c4faf

Upload LoRA model and probe head for run Qwen2.5-7B-Instruct_layer_14

Browse files
value_head_probes/Qwen2.5-7B-Instruct_layer_14/results.json CHANGED
@@ -1 +1 @@
1
- {"eval_metrics": {"eval_accuracy": 0.5720537046245648, "eval_precision": 0.6145487473588892, "eval_recall": 0.40221256420387197, "eval_f1": 0.4862089552238806, "eval_auc": 0.6171850389043279, "eval_lm_loss": 4.9619677977128465, "eval_probe_loss": 1.087889802024107, "eval_sparsity": 0.08887162642045454, "epoch": 4.0}, "train_metrics": {"train_accuracy": 0.9800719958065388, "train_precision": 0.7381755866069074, "train_recall": 0.8956813819577735, "train_f1": 0.8093366093366093, "train_auc": 0.9925497610010566, "train_lm_loss": 4.591152763366699, "train_probe_loss": 0.37327134192754063, "train_sparsity": 0.09552760980068109, "epoch": 4.0}}
 
1
+ {"eval_metrics": {"eval_accuracy": 0.7394964785881972, "eval_precision": 0.7521947987410966, "eval_recall": 0.7250518920645058, "eval_f1": 0.7383739837398374, "eval_auc": 0.812578985136204, "eval_lm_loss": 1.2907215974547646, "eval_probe_loss": 0.5717257234183225, "eval_sparsity": 0.09238170276988636, "epoch": 2.0}, "train_metrics": {"train_accuracy": 0.975789731715673, "train_precision": 0.7591904851447913, "train_recall": 0.8400478556009259, "train_f1": 0.7975750990826368, "train_auc": 0.9908065720994266, "train_lm_loss": 1.2737540799837845, "train_probe_loss": 0.42979353438489715, "train_sparsity": 0.08285968487079327, "epoch": 2.0}}
value_head_probes/Qwen2.5-7B-Instruct_layer_14/training_config.json CHANGED
@@ -7,10 +7,10 @@
7
  "dtype": "torch.bfloat16",
8
  "overwrite_output_dir": true,
9
  "max_steps": -1,
10
- "num_train_epochs": 4,
11
  "per_device_train_batch_size": 2,
12
  "per_device_eval_batch_size": 2,
13
- "lambda_lm": 0.1,
14
  "logging_steps": 20,
15
  "eval_steps": 100,
16
  "overfit_mode": false,
@@ -42,11 +42,7 @@
42
  24,
43
  25,
44
  26,
45
- 27,
46
- 28,
47
- 29,
48
- 30,
49
- 31
50
  ],
51
  "lora_r": 32,
52
  "lora_alpha": 64,
 
7
  "dtype": "torch.bfloat16",
8
  "overwrite_output_dir": true,
9
  "max_steps": -1,
10
+ "num_train_epochs": 2,
11
  "per_device_train_batch_size": 2,
12
  "per_device_eval_batch_size": 2,
13
+ "lambda_lm": 0.0,
14
  "logging_steps": 20,
15
  "eval_steps": 100,
16
  "overfit_mode": false,
 
42
  24,
43
  25,
44
  26,
45
+ 27
 
 
 
 
46
  ],
47
  "lora_r": 32,
48
  "lora_alpha": 64,