irodkin
/

run_20

irodkin commited on Dec 16, 2025

Commit

b5cc120

verified ·

1 Parent(s): d12b26e

Training in progress, step 1000

Files changed (3) hide show

config.json CHANGED Viewed

@@ -3,16 +3,15 @@
   "act_on": false,
   "act_type": "associative",
   "architectures": [
-    "MemoryParamsARMTForCausalLM"
   ],
   "attend_to_previous_input": false,
   "base_model_config": null,
-  "base_model_name": "HuggingFaceTB/SmolLM2-360M",
   "constant_depth": false,
   "correction": true,
   "d_mem": 64,
-  "dtype": "float32",
-  "freeze_base_model": true,
   "freeze_mem": false,
   "gating": false,
   "layers_attr": "model.layers",
@@ -22,12 +21,12 @@
   "noisy_halting": false,
   "num_mem_tokens": 32,
   "segment_alignment": "left",
-  "segment_size": 256,
-  "sliding_window": false,
   "time_penalty": 0.0,
-  "transformers_version": "4.57.1",
   "use_denom": true,
-  "use_sink": false,
   "wrap_layers": null,
   "wrap_pos": false
 }

   "act_on": false,
   "act_type": "associative",
   "architectures": [
+    "InnerLoopARMTForCausalLM"
   ],
   "attend_to_previous_input": false,
   "base_model_config": null,
+  "base_model_name": "meta-llama/Llama-3.2-1B",
   "constant_depth": false,
   "correction": true,
   "d_mem": 64,
+  "dtype": "bfloat16",
   "freeze_mem": false,
   "gating": false,
   "layers_attr": "model.layers",
   "noisy_halting": false,
   "num_mem_tokens": 32,
   "segment_alignment": "left",
+  "segment_size": 1024,
+  "sliding_window": true,
   "time_penalty": 0.0,
+  "transformers_version": "4.57.3",
   "use_denom": true,
+  "use_sink": true,
   "wrap_layers": null,
   "wrap_pos": false
 }

pytorch_model.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:6f229c2948883497ddf4250baf405d61aac0f836e0d46d98d3b835223dc508b7
-size 2840017622

 version https://git-lfs.github.com/spec/v1
+oid sha256:6795900029c02bf39a79ec4e6add1475b7d65592edc55f6ed515c16600e2ebf6
+size 2089174366

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:040d38999fb2ddf2bbda5b417a49f1b72b0e153e9b9a75d3c02bc96cc4aa9dd3
-size 5624

 version https://git-lfs.github.com/spec/v1
+oid sha256:1d69749efec977196b030b4491ab2ab14c887415b735deff3f21eed4c29893e2
+size 6904