Training in progress, epoch 1

Browse files

Files changed (3) hide show

adapter_config.json +51 -0
adapter_model.safetensors +3 -0
training_args.bin +3 -0

adapter_config.json ADDED Viewed

	@@ -0,0 +1,51 @@

+{
+  "auto_mapping": null,
+  "base_model_name_or_path": "EleutherAI/pythia-70m",
+  "candidate_reselection_proportion": 0.2,
+  "candidate_reselection_steps": null,
+  "density": 0.01,
+  "dtype": "float32",
+  "inference_mode": true,
+  "initial_reselection_rate": 0.2,
+  "modules_to_save": null,
+  "num_deltas": {
+    "gpt_neox.layers.0.attention.dense": 9781,
+    "gpt_neox.layers.0.attention.query_key_value": 29344,
+    "gpt_neox.layers.0.mlp.dense_4h_to_h": 39125,
+    "gpt_neox.layers.0.mlp.dense_h_to_4h": 39125,
+    "gpt_neox.layers.1.attention.dense": 9781,
+    "gpt_neox.layers.1.attention.query_key_value": 29344,
+    "gpt_neox.layers.1.mlp.dense_4h_to_h": 39125,
+    "gpt_neox.layers.1.mlp.dense_h_to_4h": 39125,
+    "gpt_neox.layers.2.attention.dense": 9781,
+    "gpt_neox.layers.2.attention.query_key_value": 29344,
+    "gpt_neox.layers.2.mlp.dense_4h_to_h": 39125,
+    "gpt_neox.layers.2.mlp.dense_h_to_4h": 39125,
+    "gpt_neox.layers.3.attention.dense": 9781,
+    "gpt_neox.layers.3.attention.query_key_value": 29344,
+    "gpt_neox.layers.3.mlp.dense_4h_to_h": 39125,
+    "gpt_neox.layers.3.mlp.dense_h_to_4h": 39125,
+    "gpt_neox.layers.4.attention.dense": 9781,
+    "gpt_neox.layers.4.attention.query_key_value": 29344,
+    "gpt_neox.layers.4.mlp.dense_4h_to_h": 39125,
+    "gpt_neox.layers.4.mlp.dense_h_to_4h": 39125,
+    "gpt_neox.layers.5.attention.dense": 9781,
+    "gpt_neox.layers.5.attention.query_key_value": 29344,
+    "gpt_neox.layers.5.mlp.dense_4h_to_h": 39125,
+    "gpt_neox.layers.5.mlp.dense_h_to_4h": 39125
+  },
+  "num_tunable_weights": null,
+  "peft_type": "SFT",
+  "reselection_rate_policy": "linear",
+  "reselection_steps": 20,
+  "revision": null,
+  "selection_accumulation_steps": 5,
+  "selection_algorithm": "rigl",
+  "target_modules": [
+    "query_key_value",
+    "dense",
+    "dense_h_to_4h",
+    "dense_4h_to_h"
+  ],
+  "task_type": "CAUSAL_LM"
+}

adapter_model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ebfd489e9b3bf2e8f39fbb0500620fcd8ebedce1571f788b4f1a97682a0a47e8
+size 5640680

training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2cc1addda1218733c9dee0892baa2171358bade3f18bc49b318555fc96871ce4
+size 5368