irodkin commited on
Commit
5fd009c
·
verified ·
1 Parent(s): 47f4cfd

Training in progress, step 1000

Browse files
Files changed (3) hide show
  1. config.json +8 -11
  2. pytorch_model.bin +3 -0
  3. training_args.bin +2 -2
config.json CHANGED
@@ -7,11 +7,11 @@
7
  ],
8
  "attend_to_previous_input": false,
9
  "base_model_config": null,
10
- "base_model_name": "meta-llama/Llama-3.2-1B",
11
  "constant_depth": false,
12
  "correction": true,
13
  "d_mem": 64,
14
- "dtype": "bfloat16",
15
  "freeze_mem": false,
16
  "gating": false,
17
  "layers_attr": "model.layers",
@@ -21,15 +21,12 @@
21
  "noisy_halting": false,
22
  "num_mem_tokens": 32,
23
  "segment_alignment": "left",
24
- "segment_size": 1024,
25
- "sliding_window": true,
26
  "time_penalty": 0.0,
27
  "transformers_version": "4.57.1",
28
  "use_denom": true,
29
- "use_sink": true,
30
- "wrap_pos": false,
31
- "auto_map": {
32
- "AutoConfig": "modeling_armt.ARMTConfig",
33
- "AutoModelForCausalLM": "modeling_armt.InnerLoopARMTForCausalLM"
34
- }
35
- }
 
7
  ],
8
  "attend_to_previous_input": false,
9
  "base_model_config": null,
10
+ "base_model_name": "HuggingFaceTB/SmolLM2-360M",
11
  "constant_depth": false,
12
  "correction": true,
13
  "d_mem": 64,
14
+ "dtype": "float32",
15
  "freeze_mem": false,
16
  "gating": false,
17
  "layers_attr": "model.layers",
 
21
  "noisy_halting": false,
22
  "num_mem_tokens": 32,
23
  "segment_alignment": "left",
24
+ "segment_size": 256,
25
+ "sliding_window": false,
26
  "time_penalty": 0.0,
27
  "transformers_version": "4.57.1",
28
  "use_denom": true,
29
+ "use_sink": false,
30
+ "wrap_layers": null,
31
+ "wrap_pos": false
32
+ }
 
 
 
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7fc53384a64c72342150a186244c88b3b4777c9920420e0559fbc7807f30c4fd
3
+ size 1581378582
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7f611e06634e71a81b71d6dabce59e72a77202abdab67535b76daec6cb4fc1b4
3
- size 6904
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1368ed84377993a4b44fe26324df7269812af0c8ec550abe9eadd6fe320805ca
3
+ size 5624