Upload LoRA model and probe head for run first gemma 2B with last layer

Browse files

Files changed (2) hide show

value_head_probes/first gemma 2B with last layer/results.json +1 -0
value_head_probes/first gemma 2B with last layer/training_config.json +67 -0

value_head_probes/first gemma 2B with last layer/results.json ADDED Viewed

	@@ -0,0 +1 @@

+ {"eval_metrics": {"eval_accuracy": 0.5481849825957236, "eval_precision": 0.6286564204263758, "eval_recall": 0.2504938759383643, "eval_f1": 0.35824268964543016, "eval_auc": 0.599054203344184, "eval_lm_loss": 8.58864703611894, "eval_probe_loss": 0.8011899159713225, "eval_sparsity": 0.17750355113636362, "epoch": 2.0}, "train_metrics": {"train_accuracy": 0.9460922712516787, "train_precision": 0.4116284641801773, "train_recall": 0.3297504798464491, "train_f1": 0.36616816454122414, "train_auc": 0.8816274559252752, "train_lm_loss": 8.537573195726443, "train_probe_loss": 0.8766092875064948, "train_sparsity": 0.1735576923076923, "epoch": 2.0}}

value_head_probes/first gemma 2B with last layer/training_config.json ADDED Viewed

	@@ -0,0 +1,67 @@

+{
+    "model_name": "google/gemma-2b",
+    "data_model_name": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+    "layer": 17,
+    "adapter_dir": "/workspace/hallucination_detection/value_head_probes/first gemma 2B with last layer",
+    "train_split": 0.8,
+    "dtype": "torch.bfloat16",
+    "overwrite_output_dir": true,
+    "max_steps": -1,
+    "num_train_epochs": 2,
+    "per_device_train_batch_size": 2,
+    "per_device_eval_batch_size": 2,
+    "lambda_lm": 0.1,
+    "logging_steps": 20,
+    "eval_steps": 100,
+    "overfit_mode": false,
+    "lora_layers": [
+        0,
+        1,
+        2,
+        3,
+        4,
+        5,
+        6,
+        7,
+        8,
+        9,
+        10,
+        11,
+        12,
+        13,
+        14,
+        15,
+        16,
+        17,
+        18,
+        19,
+        20,
+        21,
+        22,
+        23,
+        24,
+        25,
+        26,
+        27,
+        28,
+        29,
+        30,
+        31
+    ],
+    "lora_r": 32,
+    "lora_alpha": 64,
+    "lora_dropout": 0.05,
+    "upload_to_hf": true,
+    "load_from_hf": false,
+    "probe_threshold": 0.5,
+    "hf_repo": "obalcells/labeled-entity-facts",
+    "dataset_name": "longfact",
+    "max_length": 1200,
+    "default_ignore": false,
+    "pos_weight": 10.0,
+    "neg_weight": 10.0,
+    "ignore_window_size": 5,
+    "shuffle": true,
+    "seed": 42,
+    "run_id": "first gemma 2B with last layer"
+}