Upload LoRA model and probe head for run Meta-Llama-3.1-8B-Instruct_layer_16

Browse files

Files changed (2) hide show

value_head_probes/Meta-Llama-3.1-8B-Instruct_layer_16/results.json +1 -0
value_head_probes/Meta-Llama-3.1-8B-Instruct_layer_16/training_config.json +67 -0

value_head_probes/Meta-Llama-3.1-8B-Instruct_layer_16/results.json ADDED Viewed

	@@ -0,0 +1 @@

+ {"eval_metrics": {"eval_accuracy": 0.7370462456489308, "eval_precision": 0.7552787162162162, "eval_recall": 0.7066376926116159, "eval_f1": 0.7301490100020412, "eval_auc": 0.8124193111763027, "eval_lm_loss": 0.6732445657253265, "eval_probe_loss": 1.0031825680624356, "eval_sparsity": 0.0649380770596591, "epoch": 4.0}, "train_metrics": {"train_accuracy": 0.9948986605396211, "train_precision": 0.9192301909487295, "train_recall": 0.9778950735764556, "train_f1": 0.9476555839727195, "train_auc": 0.9993614757321699, "train_lm_loss": 0.45975126096835506, "train_probe_loss": 0.06347414483913244, "train_sparsity": 0.05946754064315404, "epoch": 4.0}}

value_head_probes/Meta-Llama-3.1-8B-Instruct_layer_16/training_config.json ADDED Viewed

	@@ -0,0 +1,67 @@

+{
+    "model_name": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+    "data_model_name": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+    "layer": 16,
+    "adapter_dir": "/workspace/hallucination_detection/value_head_probes/Meta-Llama-3.1-8B-Instruct_layer_16",
+    "train_split": 0.8,
+    "dtype": "torch.bfloat16",
+    "overwrite_output_dir": true,
+    "max_steps": -1,
+    "num_train_epochs": 4,
+    "per_device_train_batch_size": 2,
+    "per_device_eval_batch_size": 2,
+    "lambda_lm": 0.1,
+    "logging_steps": 20,
+    "eval_steps": 100,
+    "overfit_mode": false,
+    "lora_layers": [
+        0,
+        1,
+        2,
+        3,
+        4,
+        5,
+        6,
+        7,
+        8,
+        9,
+        10,
+        11,
+        12,
+        13,
+        14,
+        15,
+        16,
+        17,
+        18,
+        19,
+        20,
+        21,
+        22,
+        23,
+        24,
+        25,
+        26,
+        27,
+        28,
+        29,
+        30,
+        31
+    ],
+    "lora_r": 32,
+    "lora_alpha": 64,
+    "lora_dropout": 0.05,
+    "upload_to_hf": true,
+    "load_from_hf": false,
+    "probe_threshold": 0.5,
+    "hf_repo": "obalcells/labeled-entity-facts",
+    "dataset_name": "longfact",
+    "max_length": 1200,
+    "default_ignore": false,
+    "pos_weight": 10.0,
+    "neg_weight": 10.0,
+    "ignore_window_size": 5,
+    "shuffle": true,
+    "seed": 42,
+    "run_id": "Meta-Llama-3.1-8B-Instruct_layer_16"
+}