JoshuaFreeman commited on
Commit
4b422ad
·
verified ·
1 Parent(s): f5ce83c

Upload LoRA model and probe head for run layer_31_21152420

Browse files
value_head_probes/layer_31_21152420/results.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"eval_metrics": {"eval_accuracy": 0.7589259075087021, "eval_precision": 0.7678716490658002, "eval_recall": 0.7469379691821414, "eval_f1": 0.7572601642299219, "eval_auc": 0.8348568873546631, "eval_lm_loss": 0.6357943068851124, "eval_probe_loss": 0.5860002804886211, "eval_sparsity": 0.07694313742897728, "epoch": 2.0}, "train_metrics": {"train_accuracy": 0.9852262067152782, "train_precision": 0.8141453141453141, "train_recall": 0.8904030710172745, "train_f1": 0.8505683901723505, "train_auc": 0.9954571003810048, "train_lm_loss": 0.6267710517614316, "train_probe_loss": 0.25741901919962124, "train_sparsity": 0.06836923452524038, "epoch": 2.0}}
value_head_probes/layer_31_21152420/training_config.json ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model_name": "meta-llama/Meta-Llama-3.1-8B-Instruct",
3
+ "layer": 31,
4
+ "adapter_dir": "/root/hallucination_detection/value_head_probes/layer_31_21152420",
5
+ "train_split": 0.8,
6
+ "dtype": "torch.bfloat16",
7
+ "overwrite_output_dir": true,
8
+ "max_steps": -1,
9
+ "num_train_epochs": 2,
10
+ "per_device_train_batch_size": 2,
11
+ "per_device_eval_batch_size": 2,
12
+ "lambda_lm": 0.1,
13
+ "logging_steps": 20,
14
+ "eval_steps": 100,
15
+ "overfit_mode": false,
16
+ "lora_layers": [
17
+ 0,
18
+ 1,
19
+ 2,
20
+ 3,
21
+ 4,
22
+ 5,
23
+ 6,
24
+ 7,
25
+ 8,
26
+ 9,
27
+ 10,
28
+ 11,
29
+ 12,
30
+ 13,
31
+ 14,
32
+ 15,
33
+ 16,
34
+ 17,
35
+ 18,
36
+ 19,
37
+ 20,
38
+ 21,
39
+ 22,
40
+ 23,
41
+ 24,
42
+ 25,
43
+ 26,
44
+ 27,
45
+ 28,
46
+ 29,
47
+ 30,
48
+ 31
49
+ ],
50
+ "lora_r": 32,
51
+ "lora_alpha": 64,
52
+ "lora_dropout": 0.05,
53
+ "upload_to_hf": true,
54
+ "load_from_hf": false,
55
+ "probe_threshold": 0.5,
56
+ "hf_repo": "obalcells/labeled-entity-facts",
57
+ "dataset_name": "longfact",
58
+ "max_length": 1200,
59
+ "default_ignore": false,
60
+ "pos_weight": 10.0,
61
+ "neg_weight": 10.0,
62
+ "ignore_window_size": 5,
63
+ "shuffle": true,
64
+ "seed": 42
65
+ }