JoshuaFreeman commited on
Commit
a4a7274
·
verified ·
1 Parent(s): 8f294fa

Upload LoRA model and probe head for run Meta-Llama-3.1-8B-Instruct_layer_16

Browse files
value_head_probes/Meta-Llama-3.1-8B-Instruct_layer_16/results.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"eval_metrics": {"eval_accuracy": 0.7370462456489308, "eval_precision": 0.7552787162162162, "eval_recall": 0.7066376926116159, "eval_f1": 0.7301490100020412, "eval_auc": 0.8124193111763027, "eval_lm_loss": 0.6732445657253265, "eval_probe_loss": 1.0031825680624356, "eval_sparsity": 0.0649380770596591, "epoch": 4.0}, "train_metrics": {"train_accuracy": 0.9948986605396211, "train_precision": 0.9192301909487295, "train_recall": 0.9778950735764556, "train_f1": 0.9476555839727195, "train_auc": 0.9993614757321699, "train_lm_loss": 0.45975126096835506, "train_probe_loss": 0.06347414483913244, "train_sparsity": 0.05946754064315404, "epoch": 4.0}}
value_head_probes/Meta-Llama-3.1-8B-Instruct_layer_16/training_config.json ADDED
@@ -0,0 +1,67 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model_name": "meta-llama/Meta-Llama-3.1-8B-Instruct",
3
+ "data_model_name": "meta-llama/Meta-Llama-3.1-8B-Instruct",
4
+ "layer": 16,
5
+ "adapter_dir": "/workspace/hallucination_detection/value_head_probes/Meta-Llama-3.1-8B-Instruct_layer_16",
6
+ "train_split": 0.8,
7
+ "dtype": "torch.bfloat16",
8
+ "overwrite_output_dir": true,
9
+ "max_steps": -1,
10
+ "num_train_epochs": 4,
11
+ "per_device_train_batch_size": 2,
12
+ "per_device_eval_batch_size": 2,
13
+ "lambda_lm": 0.1,
14
+ "logging_steps": 20,
15
+ "eval_steps": 100,
16
+ "overfit_mode": false,
17
+ "lora_layers": [
18
+ 0,
19
+ 1,
20
+ 2,
21
+ 3,
22
+ 4,
23
+ 5,
24
+ 6,
25
+ 7,
26
+ 8,
27
+ 9,
28
+ 10,
29
+ 11,
30
+ 12,
31
+ 13,
32
+ 14,
33
+ 15,
34
+ 16,
35
+ 17,
36
+ 18,
37
+ 19,
38
+ 20,
39
+ 21,
40
+ 22,
41
+ 23,
42
+ 24,
43
+ 25,
44
+ 26,
45
+ 27,
46
+ 28,
47
+ 29,
48
+ 30,
49
+ 31
50
+ ],
51
+ "lora_r": 32,
52
+ "lora_alpha": 64,
53
+ "lora_dropout": 0.05,
54
+ "upload_to_hf": true,
55
+ "load_from_hf": false,
56
+ "probe_threshold": 0.5,
57
+ "hf_repo": "obalcells/labeled-entity-facts",
58
+ "dataset_name": "longfact",
59
+ "max_length": 1200,
60
+ "default_ignore": false,
61
+ "pos_weight": 10.0,
62
+ "neg_weight": 10.0,
63
+ "ignore_window_size": 5,
64
+ "shuffle": true,
65
+ "seed": 42,
66
+ "run_id": "Meta-Llama-3.1-8B-Instruct_layer_16"
67
+ }