irodkin commited on
Commit
b5cc120
·
verified ·
1 Parent(s): d12b26e

Training in progress, step 1000

Browse files
Files changed (3) hide show
  1. config.json +7 -8
  2. pytorch_model.bin +2 -2
  3. training_args.bin +2 -2
config.json CHANGED
@@ -3,16 +3,15 @@
3
  "act_on": false,
4
  "act_type": "associative",
5
  "architectures": [
6
- "MemoryParamsARMTForCausalLM"
7
  ],
8
  "attend_to_previous_input": false,
9
  "base_model_config": null,
10
- "base_model_name": "HuggingFaceTB/SmolLM2-360M",
11
  "constant_depth": false,
12
  "correction": true,
13
  "d_mem": 64,
14
- "dtype": "float32",
15
- "freeze_base_model": true,
16
  "freeze_mem": false,
17
  "gating": false,
18
  "layers_attr": "model.layers",
@@ -22,12 +21,12 @@
22
  "noisy_halting": false,
23
  "num_mem_tokens": 32,
24
  "segment_alignment": "left",
25
- "segment_size": 256,
26
- "sliding_window": false,
27
  "time_penalty": 0.0,
28
- "transformers_version": "4.57.1",
29
  "use_denom": true,
30
- "use_sink": false,
31
  "wrap_layers": null,
32
  "wrap_pos": false
33
  }
 
3
  "act_on": false,
4
  "act_type": "associative",
5
  "architectures": [
6
+ "InnerLoopARMTForCausalLM"
7
  ],
8
  "attend_to_previous_input": false,
9
  "base_model_config": null,
10
+ "base_model_name": "meta-llama/Llama-3.2-1B",
11
  "constant_depth": false,
12
  "correction": true,
13
  "d_mem": 64,
14
+ "dtype": "bfloat16",
 
15
  "freeze_mem": false,
16
  "gating": false,
17
  "layers_attr": "model.layers",
 
21
  "noisy_halting": false,
22
  "num_mem_tokens": 32,
23
  "segment_alignment": "left",
24
+ "segment_size": 1024,
25
+ "sliding_window": true,
26
  "time_penalty": 0.0,
27
+ "transformers_version": "4.57.3",
28
  "use_denom": true,
29
+ "use_sink": true,
30
  "wrap_layers": null,
31
  "wrap_pos": false
32
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6f229c2948883497ddf4250baf405d61aac0f836e0d46d98d3b835223dc508b7
3
- size 2840017622
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6795900029c02bf39a79ec4e6add1475b7d65592edc55f6ed515c16600e2ebf6
3
+ size 2089174366
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:040d38999fb2ddf2bbda5b417a49f1b72b0e153e9b9a75d3c02bc96cc4aa9dd3
3
- size 5624
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1d69749efec977196b030b4491ab2ab14c887415b735deff3f21eed4c29893e2
3
+ size 6904