JoshuaFreeman commited on
Commit
1d3e1ce
·
verified ·
1 Parent(s): 7433afe

Upload LoRA model and probe head for run Ministral-8B-Instruct-2410_layer_35

Browse files
value_head_probes/Ministral-8B-Instruct-2410_layer_35/results.json CHANGED
@@ -1 +1 @@
1
- {"eval_metrics": {"eval_accuracy": 0.566583789159622, "eval_precision": 0.6229050279329609, "eval_recall": 0.35242986961675227, "eval_f1": 0.4501640171587181, "eval_auc": 0.6073324463810773, "eval_lm_loss": 8.255432085557418, "eval_probe_loss": 1.241396822441708, "eval_sparsity": 0.06913396661931819, "epoch": 4.0}, "train_metrics": {"train_accuracy": 0.983966355631489, "train_precision": 0.7694456045103362, "train_recall": 0.9430262316058862, "train_f1": 0.8474386247340884, "train_auc": 0.9960988008623874, "train_lm_loss": 8.15142185260088, "train_probe_loss": 0.1747750220151666, "train_sparsity": 0.08543802897135416, "epoch": 4.0}}
 
1
+ {"eval_metrics": {"eval_accuracy": 0.7489156626506024, "eval_precision": 0.7570593962999026, "eval_recall": 0.7411820781696854, "eval_f1": 0.7490366088631984, "eval_auc": 0.8341794107365884, "eval_lm_loss": 2.585011690313166, "eval_probe_loss": 0.5946418019858274, "eval_sparsity": 0.0897793856534091, "epoch": 2.0}, "train_metrics": {"train_accuracy": 0.9836468411957705, "train_precision": 0.8307563827248866, "train_recall": 0.9051605355518003, "train_f1": 0.8663639191290824, "train_auc": 0.9955749007106219, "train_lm_loss": 2.5909482983442453, "train_probe_loss": 0.28699275195980684, "train_sparsity": 0.07864575508313301, "epoch": 2.0}}
value_head_probes/Ministral-8B-Instruct-2410_layer_35/training_config.json CHANGED
@@ -7,10 +7,10 @@
7
  "dtype": "torch.bfloat16",
8
  "overwrite_output_dir": true,
9
  "max_steps": -1,
10
- "num_train_epochs": 4,
11
  "per_device_train_batch_size": 2,
12
  "per_device_eval_batch_size": 2,
13
- "lambda_lm": 0.1,
14
  "logging_steps": 20,
15
  "eval_steps": 100,
16
  "overfit_mode": false,
@@ -42,11 +42,7 @@
42
  24,
43
  25,
44
  26,
45
- 27,
46
- 28,
47
- 29,
48
- 30,
49
- 31
50
  ],
51
  "lora_r": 32,
52
  "lora_alpha": 64,
 
7
  "dtype": "torch.bfloat16",
8
  "overwrite_output_dir": true,
9
  "max_steps": -1,
10
+ "num_train_epochs": 2,
11
  "per_device_train_batch_size": 2,
12
  "per_device_eval_batch_size": 2,
13
+ "lambda_lm": 0.0,
14
  "logging_steps": 20,
15
  "eval_steps": 100,
16
  "overfit_mode": false,
 
42
  24,
43
  25,
44
  26,
45
+ 27
 
 
 
 
46
  ],
47
  "lora_r": 32,
48
  "lora_alpha": 64,