irodkin
/

run_21

+{
+  "act_format": "linear",
+  "act_on": false,
+  "act_type": "associative",
+  "architectures": [
+    "InnerLoopARMTForCausalLM"
+  ],
+  "attend_to_previous_input": false,
+  "base_model_config": null,
+  "base_model_name": "meta-llama/Llama-3.2-1B",
+  "constant_depth": false,
+  "correction": true,
+  "d_mem": 64,
+  "dtype": "bfloat16",
+  "freeze_mem": false,
+  "gating": false,
+  "layers_attr": "model.layers",
+  "max_hop": 4,
+  "model_type": "armt",
+  "n_heads": 1,
+  "noisy_halting": false,
+  "num_mem_tokens": 32,
+  "segment_alignment": "left",
+  "segment_size": 1024,
+  "sliding_window": true,
+  "time_penalty": 0.0,
+  "transformers_version": "4.57.1",
+  "use_denom": true,
+  "use_sink": true,
+  "wrap_pos": false,
+  "auto_map": {
+    "AutoConfig": "modeling_armt.ARMTConfig",
+    "AutoModelForCausalLM": "modeling_armt.InnerLoopARMTForCausalLM"
+  }
+}