smohammadi commited on
Commit
8df00b2
·
verified ·
1 Parent(s): f1b3d86

Add files using upload-large-folder tool

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +6 -0
  2. base_checkpoints/model.zarr/zarr.json +14 -0
  3. base_checkpoints/state.zarr/.adamw_mu[1]/zarr.json +42 -0
  4. base_checkpoints/state.zarr/.adamw_nu[1]/zarr.json +42 -0
  5. base_checkpoints/state.zarr/.mu[0].attn.c_k/zarr.json +42 -0
  6. base_checkpoints/state.zarr/.mu[0].attn.c_q/zarr.json +42 -0
  7. base_checkpoints/state.zarr/.mu[0].attn.c_v/zarr.json +42 -0
  8. base_checkpoints/state.zarr/.mu[0].mlp.c_proj/zarr.json +42 -0
  9. base_checkpoints/state.zarr/.mu[10].attn.c_k/zarr.json +42 -0
  10. base_checkpoints/state.zarr/.mu[10].attn.c_q/zarr.json +42 -0
  11. base_checkpoints/state.zarr/.mu[10].attn.c_v/zarr.json +42 -0
  12. base_checkpoints/state.zarr/.mu[11].attn.c_k/zarr.json +42 -0
  13. base_checkpoints/state.zarr/.mu[11].attn.c_q/zarr.json +42 -0
  14. base_checkpoints/state.zarr/.mu[11].attn.c_v/zarr.json +42 -0
  15. base_checkpoints/state.zarr/.mu[11].mlp.c_proj/zarr.json +42 -0
  16. base_checkpoints/state.zarr/.mu[12].attn.c_proj/zarr.json +42 -0
  17. base_checkpoints/state.zarr/.mu[12].mlp.c_fc/zarr.json +42 -0
  18. base_checkpoints/state.zarr/.mu[12].mlp.c_proj/zarr.json +42 -0
  19. base_checkpoints/state.zarr/.mu[13].attn.c_proj/zarr.json +42 -0
  20. base_checkpoints/state.zarr/.mu[13].mlp.c_fc/zarr.json +42 -0
  21. base_checkpoints/state.zarr/.mu[14].attn.c_proj/zarr.json +42 -0
  22. base_checkpoints/state.zarr/.mu[14].mlp.c_fc/zarr.json +42 -0
  23. base_checkpoints/state.zarr/.mu[14].mlp.c_proj/zarr.json +42 -0
  24. base_checkpoints/state.zarr/.mu[15].attn.c_proj/zarr.json +42 -0
  25. base_checkpoints/state.zarr/.mu[15].mlp.c_fc/zarr.json +42 -0
  26. base_checkpoints/state.zarr/.mu[16].attn.c_k/zarr.json +42 -0
  27. base_checkpoints/state.zarr/.mu[16].attn.c_q/zarr.json +42 -0
  28. base_checkpoints/state.zarr/.mu[16].attn.c_v/zarr.json +42 -0
  29. base_checkpoints/state.zarr/.mu[17].attn.c_k/zarr.json +42 -0
  30. base_checkpoints/state.zarr/.mu[17].attn.c_q/zarr.json +42 -0
  31. base_checkpoints/state.zarr/.mu[17].attn.c_v/zarr.json +42 -0
  32. base_checkpoints/state.zarr/.mu[17].mlp.c_proj/zarr.json +42 -0
  33. base_checkpoints/state.zarr/.mu[18].attn.c_proj/zarr.json +42 -0
  34. base_checkpoints/state.zarr/.mu[18].mlp.c_fc/zarr.json +42 -0
  35. base_checkpoints/state.zarr/.mu[18].mlp.c_proj/zarr.json +42 -0
  36. base_checkpoints/state.zarr/.mu[19].attn.c_proj/zarr.json +42 -0
  37. base_checkpoints/state.zarr/.mu[19].mlp.c_fc/zarr.json +42 -0
  38. base_checkpoints/state.zarr/.mu[1].attn.c_k/zarr.json +42 -0
  39. base_checkpoints/state.zarr/.mu[1].attn.c_q/zarr.json +42 -0
  40. base_checkpoints/state.zarr/.mu[1].attn.c_v/zarr.json +42 -0
  41. base_checkpoints/state.zarr/.mu[20].mlp.c_fc/zarr.json +42 -0
  42. base_checkpoints/state.zarr/.mu[20].mlp.c_proj/zarr.json +42 -0
  43. base_checkpoints/state.zarr/.mu[21].mlp.c_fc/zarr.json +42 -0
  44. base_checkpoints/state.zarr/.mu[22].attn.c_k/zarr.json +42 -0
  45. base_checkpoints/state.zarr/.mu[22].attn.c_proj/zarr.json +42 -0
  46. base_checkpoints/state.zarr/.mu[22].attn.c_q/zarr.json +42 -0
  47. base_checkpoints/state.zarr/.mu[22].attn.c_v/zarr.json +42 -0
  48. base_checkpoints/state.zarr/.mu[23].attn.c_k/zarr.json +42 -0
  49. base_checkpoints/state.zarr/.mu[23].attn.c_proj/zarr.json +42 -0
  50. base_checkpoints/state.zarr/.mu[23].attn.c_q/zarr.json +42 -0
.gitattributes CHANGED
@@ -56,3 +56,9 @@ sft_checkpoints/state.zarr/.mu\[16\].mlp.c_proj/c/6/1 filter=lfs diff=lfs merge=
56
  sft_checkpoints/state.zarr/.mu\[16\].mlp.c_proj/c/6/4 filter=lfs diff=lfs merge=lfs -text
57
  sft_checkpoints/state.zarr/.mu\[16\].mlp.c_proj/c/6/3 filter=lfs diff=lfs merge=lfs -text
58
  sft_checkpoints/state.zarr/.mu\[16\].mlp.c_proj/c/6/2 filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
56
  sft_checkpoints/state.zarr/.mu\[16\].mlp.c_proj/c/6/4 filter=lfs diff=lfs merge=lfs -text
57
  sft_checkpoints/state.zarr/.mu\[16\].mlp.c_proj/c/6/3 filter=lfs diff=lfs merge=lfs -text
58
  sft_checkpoints/state.zarr/.mu\[16\].mlp.c_proj/c/6/2 filter=lfs diff=lfs merge=lfs -text
59
+ sft_checkpoints/state.zarr/.mu\[16\].mlp.c_proj/c/6/5 filter=lfs diff=lfs merge=lfs -text
60
+ sft_checkpoints/state.zarr/.mu\[16\].mlp.c_proj/c/1/0 filter=lfs diff=lfs merge=lfs -text
61
+ sft_checkpoints/state.zarr/.mu\[16\].mlp.c_proj/c/1/7 filter=lfs diff=lfs merge=lfs -text
62
+ sft_checkpoints/state.zarr/.mu\[16\].mlp.c_proj/c/1/6 filter=lfs diff=lfs merge=lfs -text
63
+ sft_checkpoints/state.zarr/.mu\[16\].mlp.c_proj/c/1/1 filter=lfs diff=lfs merge=lfs -text
64
+ sft_checkpoints/state.zarr/.mu\[16\].mlp.c_proj/c/1/4 filter=lfs diff=lfs merge=lfs -text
base_checkpoints/model.zarr/zarr.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "attributes": {
3
+ "config": {
4
+ "sequence_len": 4096,
5
+ "vocab_size": 32768,
6
+ "n_layer": 24,
7
+ "n_head": 8,
8
+ "n_kv_head": 8,
9
+ "n_embed": 2048
10
+ }
11
+ },
12
+ "zarr_format": 3,
13
+ "node_type": "group"
14
+ }
base_checkpoints/state.zarr/.adamw_mu[1]/zarr.json ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "shape": [
3
+ 2048,
4
+ 32768
5
+ ],
6
+ "data_type": "float32",
7
+ "chunk_grid": {
8
+ "name": "regular",
9
+ "configuration": {
10
+ "chunk_shape": [
11
+ 128,
12
+ 2048
13
+ ]
14
+ }
15
+ },
16
+ "chunk_key_encoding": {
17
+ "name": "default",
18
+ "configuration": {
19
+ "separator": "/"
20
+ }
21
+ },
22
+ "fill_value": 0.0,
23
+ "codecs": [
24
+ {
25
+ "name": "bytes",
26
+ "configuration": {
27
+ "endian": "little"
28
+ }
29
+ },
30
+ {
31
+ "name": "zstd",
32
+ "configuration": {
33
+ "level": 0,
34
+ "checksum": false
35
+ }
36
+ }
37
+ ],
38
+ "attributes": {},
39
+ "zarr_format": 3,
40
+ "node_type": "array",
41
+ "storage_transformers": []
42
+ }
base_checkpoints/state.zarr/.adamw_nu[1]/zarr.json ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "shape": [
3
+ 2048,
4
+ 32768
5
+ ],
6
+ "data_type": "float32",
7
+ "chunk_grid": {
8
+ "name": "regular",
9
+ "configuration": {
10
+ "chunk_shape": [
11
+ 128,
12
+ 2048
13
+ ]
14
+ }
15
+ },
16
+ "chunk_key_encoding": {
17
+ "name": "default",
18
+ "configuration": {
19
+ "separator": "/"
20
+ }
21
+ },
22
+ "fill_value": 0.0,
23
+ "codecs": [
24
+ {
25
+ "name": "bytes",
26
+ "configuration": {
27
+ "endian": "little"
28
+ }
29
+ },
30
+ {
31
+ "name": "zstd",
32
+ "configuration": {
33
+ "level": 0,
34
+ "checksum": false
35
+ }
36
+ }
37
+ ],
38
+ "attributes": {},
39
+ "zarr_format": 3,
40
+ "node_type": "array",
41
+ "storage_transformers": []
42
+ }
base_checkpoints/state.zarr/.mu[0].attn.c_k/zarr.json ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "shape": [
3
+ 2048,
4
+ 2048
5
+ ],
6
+ "data_type": "float32",
7
+ "chunk_grid": {
8
+ "name": "regular",
9
+ "configuration": {
10
+ "chunk_shape": [
11
+ 256,
12
+ 512
13
+ ]
14
+ }
15
+ },
16
+ "chunk_key_encoding": {
17
+ "name": "default",
18
+ "configuration": {
19
+ "separator": "/"
20
+ }
21
+ },
22
+ "fill_value": 0.0,
23
+ "codecs": [
24
+ {
25
+ "name": "bytes",
26
+ "configuration": {
27
+ "endian": "little"
28
+ }
29
+ },
30
+ {
31
+ "name": "zstd",
32
+ "configuration": {
33
+ "level": 0,
34
+ "checksum": false
35
+ }
36
+ }
37
+ ],
38
+ "attributes": {},
39
+ "zarr_format": 3,
40
+ "node_type": "array",
41
+ "storage_transformers": []
42
+ }
base_checkpoints/state.zarr/.mu[0].attn.c_q/zarr.json ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "shape": [
3
+ 2048,
4
+ 2048
5
+ ],
6
+ "data_type": "float32",
7
+ "chunk_grid": {
8
+ "name": "regular",
9
+ "configuration": {
10
+ "chunk_shape": [
11
+ 256,
12
+ 512
13
+ ]
14
+ }
15
+ },
16
+ "chunk_key_encoding": {
17
+ "name": "default",
18
+ "configuration": {
19
+ "separator": "/"
20
+ }
21
+ },
22
+ "fill_value": 0.0,
23
+ "codecs": [
24
+ {
25
+ "name": "bytes",
26
+ "configuration": {
27
+ "endian": "little"
28
+ }
29
+ },
30
+ {
31
+ "name": "zstd",
32
+ "configuration": {
33
+ "level": 0,
34
+ "checksum": false
35
+ }
36
+ }
37
+ ],
38
+ "attributes": {},
39
+ "zarr_format": 3,
40
+ "node_type": "array",
41
+ "storage_transformers": []
42
+ }
base_checkpoints/state.zarr/.mu[0].attn.c_v/zarr.json ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "shape": [
3
+ 2048,
4
+ 2048
5
+ ],
6
+ "data_type": "float32",
7
+ "chunk_grid": {
8
+ "name": "regular",
9
+ "configuration": {
10
+ "chunk_shape": [
11
+ 256,
12
+ 512
13
+ ]
14
+ }
15
+ },
16
+ "chunk_key_encoding": {
17
+ "name": "default",
18
+ "configuration": {
19
+ "separator": "/"
20
+ }
21
+ },
22
+ "fill_value": 0.0,
23
+ "codecs": [
24
+ {
25
+ "name": "bytes",
26
+ "configuration": {
27
+ "endian": "little"
28
+ }
29
+ },
30
+ {
31
+ "name": "zstd",
32
+ "configuration": {
33
+ "level": 0,
34
+ "checksum": false
35
+ }
36
+ }
37
+ ],
38
+ "attributes": {},
39
+ "zarr_format": 3,
40
+ "node_type": "array",
41
+ "storage_transformers": []
42
+ }
base_checkpoints/state.zarr/.mu[0].mlp.c_proj/zarr.json ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "shape": [
3
+ 8192,
4
+ 2048
5
+ ],
6
+ "data_type": "float32",
7
+ "chunk_grid": {
8
+ "name": "regular",
9
+ "configuration": {
10
+ "chunk_shape": [
11
+ 1024,
12
+ 256
13
+ ]
14
+ }
15
+ },
16
+ "chunk_key_encoding": {
17
+ "name": "default",
18
+ "configuration": {
19
+ "separator": "/"
20
+ }
21
+ },
22
+ "fill_value": 0.0,
23
+ "codecs": [
24
+ {
25
+ "name": "bytes",
26
+ "configuration": {
27
+ "endian": "little"
28
+ }
29
+ },
30
+ {
31
+ "name": "zstd",
32
+ "configuration": {
33
+ "level": 0,
34
+ "checksum": false
35
+ }
36
+ }
37
+ ],
38
+ "attributes": {},
39
+ "zarr_format": 3,
40
+ "node_type": "array",
41
+ "storage_transformers": []
42
+ }
base_checkpoints/state.zarr/.mu[10].attn.c_k/zarr.json ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "shape": [
3
+ 2048,
4
+ 2048
5
+ ],
6
+ "data_type": "float32",
7
+ "chunk_grid": {
8
+ "name": "regular",
9
+ "configuration": {
10
+ "chunk_shape": [
11
+ 256,
12
+ 512
13
+ ]
14
+ }
15
+ },
16
+ "chunk_key_encoding": {
17
+ "name": "default",
18
+ "configuration": {
19
+ "separator": "/"
20
+ }
21
+ },
22
+ "fill_value": 0.0,
23
+ "codecs": [
24
+ {
25
+ "name": "bytes",
26
+ "configuration": {
27
+ "endian": "little"
28
+ }
29
+ },
30
+ {
31
+ "name": "zstd",
32
+ "configuration": {
33
+ "level": 0,
34
+ "checksum": false
35
+ }
36
+ }
37
+ ],
38
+ "attributes": {},
39
+ "zarr_format": 3,
40
+ "node_type": "array",
41
+ "storage_transformers": []
42
+ }
base_checkpoints/state.zarr/.mu[10].attn.c_q/zarr.json ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "shape": [
3
+ 2048,
4
+ 2048
5
+ ],
6
+ "data_type": "float32",
7
+ "chunk_grid": {
8
+ "name": "regular",
9
+ "configuration": {
10
+ "chunk_shape": [
11
+ 256,
12
+ 512
13
+ ]
14
+ }
15
+ },
16
+ "chunk_key_encoding": {
17
+ "name": "default",
18
+ "configuration": {
19
+ "separator": "/"
20
+ }
21
+ },
22
+ "fill_value": 0.0,
23
+ "codecs": [
24
+ {
25
+ "name": "bytes",
26
+ "configuration": {
27
+ "endian": "little"
28
+ }
29
+ },
30
+ {
31
+ "name": "zstd",
32
+ "configuration": {
33
+ "level": 0,
34
+ "checksum": false
35
+ }
36
+ }
37
+ ],
38
+ "attributes": {},
39
+ "zarr_format": 3,
40
+ "node_type": "array",
41
+ "storage_transformers": []
42
+ }
base_checkpoints/state.zarr/.mu[10].attn.c_v/zarr.json ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "shape": [
3
+ 2048,
4
+ 2048
5
+ ],
6
+ "data_type": "float32",
7
+ "chunk_grid": {
8
+ "name": "regular",
9
+ "configuration": {
10
+ "chunk_shape": [
11
+ 256,
12
+ 512
13
+ ]
14
+ }
15
+ },
16
+ "chunk_key_encoding": {
17
+ "name": "default",
18
+ "configuration": {
19
+ "separator": "/"
20
+ }
21
+ },
22
+ "fill_value": 0.0,
23
+ "codecs": [
24
+ {
25
+ "name": "bytes",
26
+ "configuration": {
27
+ "endian": "little"
28
+ }
29
+ },
30
+ {
31
+ "name": "zstd",
32
+ "configuration": {
33
+ "level": 0,
34
+ "checksum": false
35
+ }
36
+ }
37
+ ],
38
+ "attributes": {},
39
+ "zarr_format": 3,
40
+ "node_type": "array",
41
+ "storage_transformers": []
42
+ }
base_checkpoints/state.zarr/.mu[11].attn.c_k/zarr.json ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "shape": [
3
+ 2048,
4
+ 2048
5
+ ],
6
+ "data_type": "float32",
7
+ "chunk_grid": {
8
+ "name": "regular",
9
+ "configuration": {
10
+ "chunk_shape": [
11
+ 256,
12
+ 512
13
+ ]
14
+ }
15
+ },
16
+ "chunk_key_encoding": {
17
+ "name": "default",
18
+ "configuration": {
19
+ "separator": "/"
20
+ }
21
+ },
22
+ "fill_value": 0.0,
23
+ "codecs": [
24
+ {
25
+ "name": "bytes",
26
+ "configuration": {
27
+ "endian": "little"
28
+ }
29
+ },
30
+ {
31
+ "name": "zstd",
32
+ "configuration": {
33
+ "level": 0,
34
+ "checksum": false
35
+ }
36
+ }
37
+ ],
38
+ "attributes": {},
39
+ "zarr_format": 3,
40
+ "node_type": "array",
41
+ "storage_transformers": []
42
+ }
base_checkpoints/state.zarr/.mu[11].attn.c_q/zarr.json ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "shape": [
3
+ 2048,
4
+ 2048
5
+ ],
6
+ "data_type": "float32",
7
+ "chunk_grid": {
8
+ "name": "regular",
9
+ "configuration": {
10
+ "chunk_shape": [
11
+ 256,
12
+ 512
13
+ ]
14
+ }
15
+ },
16
+ "chunk_key_encoding": {
17
+ "name": "default",
18
+ "configuration": {
19
+ "separator": "/"
20
+ }
21
+ },
22
+ "fill_value": 0.0,
23
+ "codecs": [
24
+ {
25
+ "name": "bytes",
26
+ "configuration": {
27
+ "endian": "little"
28
+ }
29
+ },
30
+ {
31
+ "name": "zstd",
32
+ "configuration": {
33
+ "level": 0,
34
+ "checksum": false
35
+ }
36
+ }
37
+ ],
38
+ "attributes": {},
39
+ "zarr_format": 3,
40
+ "node_type": "array",
41
+ "storage_transformers": []
42
+ }
base_checkpoints/state.zarr/.mu[11].attn.c_v/zarr.json ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "shape": [
3
+ 2048,
4
+ 2048
5
+ ],
6
+ "data_type": "float32",
7
+ "chunk_grid": {
8
+ "name": "regular",
9
+ "configuration": {
10
+ "chunk_shape": [
11
+ 256,
12
+ 512
13
+ ]
14
+ }
15
+ },
16
+ "chunk_key_encoding": {
17
+ "name": "default",
18
+ "configuration": {
19
+ "separator": "/"
20
+ }
21
+ },
22
+ "fill_value": 0.0,
23
+ "codecs": [
24
+ {
25
+ "name": "bytes",
26
+ "configuration": {
27
+ "endian": "little"
28
+ }
29
+ },
30
+ {
31
+ "name": "zstd",
32
+ "configuration": {
33
+ "level": 0,
34
+ "checksum": false
35
+ }
36
+ }
37
+ ],
38
+ "attributes": {},
39
+ "zarr_format": 3,
40
+ "node_type": "array",
41
+ "storage_transformers": []
42
+ }
base_checkpoints/state.zarr/.mu[11].mlp.c_proj/zarr.json ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "shape": [
3
+ 8192,
4
+ 2048
5
+ ],
6
+ "data_type": "float32",
7
+ "chunk_grid": {
8
+ "name": "regular",
9
+ "configuration": {
10
+ "chunk_shape": [
11
+ 1024,
12
+ 256
13
+ ]
14
+ }
15
+ },
16
+ "chunk_key_encoding": {
17
+ "name": "default",
18
+ "configuration": {
19
+ "separator": "/"
20
+ }
21
+ },
22
+ "fill_value": 0.0,
23
+ "codecs": [
24
+ {
25
+ "name": "bytes",
26
+ "configuration": {
27
+ "endian": "little"
28
+ }
29
+ },
30
+ {
31
+ "name": "zstd",
32
+ "configuration": {
33
+ "level": 0,
34
+ "checksum": false
35
+ }
36
+ }
37
+ ],
38
+ "attributes": {},
39
+ "zarr_format": 3,
40
+ "node_type": "array",
41
+ "storage_transformers": []
42
+ }
base_checkpoints/state.zarr/.mu[12].attn.c_proj/zarr.json ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "shape": [
3
+ 2048,
4
+ 2048
5
+ ],
6
+ "data_type": "float32",
7
+ "chunk_grid": {
8
+ "name": "regular",
9
+ "configuration": {
10
+ "chunk_shape": [
11
+ 256,
12
+ 512
13
+ ]
14
+ }
15
+ },
16
+ "chunk_key_encoding": {
17
+ "name": "default",
18
+ "configuration": {
19
+ "separator": "/"
20
+ }
21
+ },
22
+ "fill_value": 0.0,
23
+ "codecs": [
24
+ {
25
+ "name": "bytes",
26
+ "configuration": {
27
+ "endian": "little"
28
+ }
29
+ },
30
+ {
31
+ "name": "zstd",
32
+ "configuration": {
33
+ "level": 0,
34
+ "checksum": false
35
+ }
36
+ }
37
+ ],
38
+ "attributes": {},
39
+ "zarr_format": 3,
40
+ "node_type": "array",
41
+ "storage_transformers": []
42
+ }
base_checkpoints/state.zarr/.mu[12].mlp.c_fc/zarr.json ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "shape": [
3
+ 2048,
4
+ 8192
5
+ ],
6
+ "data_type": "float32",
7
+ "chunk_grid": {
8
+ "name": "regular",
9
+ "configuration": {
10
+ "chunk_shape": [
11
+ 256,
12
+ 1024
13
+ ]
14
+ }
15
+ },
16
+ "chunk_key_encoding": {
17
+ "name": "default",
18
+ "configuration": {
19
+ "separator": "/"
20
+ }
21
+ },
22
+ "fill_value": 0.0,
23
+ "codecs": [
24
+ {
25
+ "name": "bytes",
26
+ "configuration": {
27
+ "endian": "little"
28
+ }
29
+ },
30
+ {
31
+ "name": "zstd",
32
+ "configuration": {
33
+ "level": 0,
34
+ "checksum": false
35
+ }
36
+ }
37
+ ],
38
+ "attributes": {},
39
+ "zarr_format": 3,
40
+ "node_type": "array",
41
+ "storage_transformers": []
42
+ }
base_checkpoints/state.zarr/.mu[12].mlp.c_proj/zarr.json ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "shape": [
3
+ 8192,
4
+ 2048
5
+ ],
6
+ "data_type": "float32",
7
+ "chunk_grid": {
8
+ "name": "regular",
9
+ "configuration": {
10
+ "chunk_shape": [
11
+ 1024,
12
+ 256
13
+ ]
14
+ }
15
+ },
16
+ "chunk_key_encoding": {
17
+ "name": "default",
18
+ "configuration": {
19
+ "separator": "/"
20
+ }
21
+ },
22
+ "fill_value": 0.0,
23
+ "codecs": [
24
+ {
25
+ "name": "bytes",
26
+ "configuration": {
27
+ "endian": "little"
28
+ }
29
+ },
30
+ {
31
+ "name": "zstd",
32
+ "configuration": {
33
+ "level": 0,
34
+ "checksum": false
35
+ }
36
+ }
37
+ ],
38
+ "attributes": {},
39
+ "zarr_format": 3,
40
+ "node_type": "array",
41
+ "storage_transformers": []
42
+ }
base_checkpoints/state.zarr/.mu[13].attn.c_proj/zarr.json ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "shape": [
3
+ 2048,
4
+ 2048
5
+ ],
6
+ "data_type": "float32",
7
+ "chunk_grid": {
8
+ "name": "regular",
9
+ "configuration": {
10
+ "chunk_shape": [
11
+ 256,
12
+ 512
13
+ ]
14
+ }
15
+ },
16
+ "chunk_key_encoding": {
17
+ "name": "default",
18
+ "configuration": {
19
+ "separator": "/"
20
+ }
21
+ },
22
+ "fill_value": 0.0,
23
+ "codecs": [
24
+ {
25
+ "name": "bytes",
26
+ "configuration": {
27
+ "endian": "little"
28
+ }
29
+ },
30
+ {
31
+ "name": "zstd",
32
+ "configuration": {
33
+ "level": 0,
34
+ "checksum": false
35
+ }
36
+ }
37
+ ],
38
+ "attributes": {},
39
+ "zarr_format": 3,
40
+ "node_type": "array",
41
+ "storage_transformers": []
42
+ }
base_checkpoints/state.zarr/.mu[13].mlp.c_fc/zarr.json ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "shape": [
3
+ 2048,
4
+ 8192
5
+ ],
6
+ "data_type": "float32",
7
+ "chunk_grid": {
8
+ "name": "regular",
9
+ "configuration": {
10
+ "chunk_shape": [
11
+ 256,
12
+ 1024
13
+ ]
14
+ }
15
+ },
16
+ "chunk_key_encoding": {
17
+ "name": "default",
18
+ "configuration": {
19
+ "separator": "/"
20
+ }
21
+ },
22
+ "fill_value": 0.0,
23
+ "codecs": [
24
+ {
25
+ "name": "bytes",
26
+ "configuration": {
27
+ "endian": "little"
28
+ }
29
+ },
30
+ {
31
+ "name": "zstd",
32
+ "configuration": {
33
+ "level": 0,
34
+ "checksum": false
35
+ }
36
+ }
37
+ ],
38
+ "attributes": {},
39
+ "zarr_format": 3,
40
+ "node_type": "array",
41
+ "storage_transformers": []
42
+ }
base_checkpoints/state.zarr/.mu[14].attn.c_proj/zarr.json ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "shape": [
3
+ 2048,
4
+ 2048
5
+ ],
6
+ "data_type": "float32",
7
+ "chunk_grid": {
8
+ "name": "regular",
9
+ "configuration": {
10
+ "chunk_shape": [
11
+ 256,
12
+ 512
13
+ ]
14
+ }
15
+ },
16
+ "chunk_key_encoding": {
17
+ "name": "default",
18
+ "configuration": {
19
+ "separator": "/"
20
+ }
21
+ },
22
+ "fill_value": 0.0,
23
+ "codecs": [
24
+ {
25
+ "name": "bytes",
26
+ "configuration": {
27
+ "endian": "little"
28
+ }
29
+ },
30
+ {
31
+ "name": "zstd",
32
+ "configuration": {
33
+ "level": 0,
34
+ "checksum": false
35
+ }
36
+ }
37
+ ],
38
+ "attributes": {},
39
+ "zarr_format": 3,
40
+ "node_type": "array",
41
+ "storage_transformers": []
42
+ }
base_checkpoints/state.zarr/.mu[14].mlp.c_fc/zarr.json ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "shape": [
3
+ 2048,
4
+ 8192
5
+ ],
6
+ "data_type": "float32",
7
+ "chunk_grid": {
8
+ "name": "regular",
9
+ "configuration": {
10
+ "chunk_shape": [
11
+ 256,
12
+ 1024
13
+ ]
14
+ }
15
+ },
16
+ "chunk_key_encoding": {
17
+ "name": "default",
18
+ "configuration": {
19
+ "separator": "/"
20
+ }
21
+ },
22
+ "fill_value": 0.0,
23
+ "codecs": [
24
+ {
25
+ "name": "bytes",
26
+ "configuration": {
27
+ "endian": "little"
28
+ }
29
+ },
30
+ {
31
+ "name": "zstd",
32
+ "configuration": {
33
+ "level": 0,
34
+ "checksum": false
35
+ }
36
+ }
37
+ ],
38
+ "attributes": {},
39
+ "zarr_format": 3,
40
+ "node_type": "array",
41
+ "storage_transformers": []
42
+ }
base_checkpoints/state.zarr/.mu[14].mlp.c_proj/zarr.json ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "shape": [
3
+ 8192,
4
+ 2048
5
+ ],
6
+ "data_type": "float32",
7
+ "chunk_grid": {
8
+ "name": "regular",
9
+ "configuration": {
10
+ "chunk_shape": [
11
+ 1024,
12
+ 256
13
+ ]
14
+ }
15
+ },
16
+ "chunk_key_encoding": {
17
+ "name": "default",
18
+ "configuration": {
19
+ "separator": "/"
20
+ }
21
+ },
22
+ "fill_value": 0.0,
23
+ "codecs": [
24
+ {
25
+ "name": "bytes",
26
+ "configuration": {
27
+ "endian": "little"
28
+ }
29
+ },
30
+ {
31
+ "name": "zstd",
32
+ "configuration": {
33
+ "level": 0,
34
+ "checksum": false
35
+ }
36
+ }
37
+ ],
38
+ "attributes": {},
39
+ "zarr_format": 3,
40
+ "node_type": "array",
41
+ "storage_transformers": []
42
+ }
base_checkpoints/state.zarr/.mu[15].attn.c_proj/zarr.json ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "shape": [
3
+ 2048,
4
+ 2048
5
+ ],
6
+ "data_type": "float32",
7
+ "chunk_grid": {
8
+ "name": "regular",
9
+ "configuration": {
10
+ "chunk_shape": [
11
+ 256,
12
+ 512
13
+ ]
14
+ }
15
+ },
16
+ "chunk_key_encoding": {
17
+ "name": "default",
18
+ "configuration": {
19
+ "separator": "/"
20
+ }
21
+ },
22
+ "fill_value": 0.0,
23
+ "codecs": [
24
+ {
25
+ "name": "bytes",
26
+ "configuration": {
27
+ "endian": "little"
28
+ }
29
+ },
30
+ {
31
+ "name": "zstd",
32
+ "configuration": {
33
+ "level": 0,
34
+ "checksum": false
35
+ }
36
+ }
37
+ ],
38
+ "attributes": {},
39
+ "zarr_format": 3,
40
+ "node_type": "array",
41
+ "storage_transformers": []
42
+ }
base_checkpoints/state.zarr/.mu[15].mlp.c_fc/zarr.json ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "shape": [
3
+ 2048,
4
+ 8192
5
+ ],
6
+ "data_type": "float32",
7
+ "chunk_grid": {
8
+ "name": "regular",
9
+ "configuration": {
10
+ "chunk_shape": [
11
+ 256,
12
+ 1024
13
+ ]
14
+ }
15
+ },
16
+ "chunk_key_encoding": {
17
+ "name": "default",
18
+ "configuration": {
19
+ "separator": "/"
20
+ }
21
+ },
22
+ "fill_value": 0.0,
23
+ "codecs": [
24
+ {
25
+ "name": "bytes",
26
+ "configuration": {
27
+ "endian": "little"
28
+ }
29
+ },
30
+ {
31
+ "name": "zstd",
32
+ "configuration": {
33
+ "level": 0,
34
+ "checksum": false
35
+ }
36
+ }
37
+ ],
38
+ "attributes": {},
39
+ "zarr_format": 3,
40
+ "node_type": "array",
41
+ "storage_transformers": []
42
+ }
base_checkpoints/state.zarr/.mu[16].attn.c_k/zarr.json ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "shape": [
3
+ 2048,
4
+ 2048
5
+ ],
6
+ "data_type": "float32",
7
+ "chunk_grid": {
8
+ "name": "regular",
9
+ "configuration": {
10
+ "chunk_shape": [
11
+ 256,
12
+ 512
13
+ ]
14
+ }
15
+ },
16
+ "chunk_key_encoding": {
17
+ "name": "default",
18
+ "configuration": {
19
+ "separator": "/"
20
+ }
21
+ },
22
+ "fill_value": 0.0,
23
+ "codecs": [
24
+ {
25
+ "name": "bytes",
26
+ "configuration": {
27
+ "endian": "little"
28
+ }
29
+ },
30
+ {
31
+ "name": "zstd",
32
+ "configuration": {
33
+ "level": 0,
34
+ "checksum": false
35
+ }
36
+ }
37
+ ],
38
+ "attributes": {},
39
+ "zarr_format": 3,
40
+ "node_type": "array",
41
+ "storage_transformers": []
42
+ }
base_checkpoints/state.zarr/.mu[16].attn.c_q/zarr.json ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "shape": [
3
+ 2048,
4
+ 2048
5
+ ],
6
+ "data_type": "float32",
7
+ "chunk_grid": {
8
+ "name": "regular",
9
+ "configuration": {
10
+ "chunk_shape": [
11
+ 256,
12
+ 512
13
+ ]
14
+ }
15
+ },
16
+ "chunk_key_encoding": {
17
+ "name": "default",
18
+ "configuration": {
19
+ "separator": "/"
20
+ }
21
+ },
22
+ "fill_value": 0.0,
23
+ "codecs": [
24
+ {
25
+ "name": "bytes",
26
+ "configuration": {
27
+ "endian": "little"
28
+ }
29
+ },
30
+ {
31
+ "name": "zstd",
32
+ "configuration": {
33
+ "level": 0,
34
+ "checksum": false
35
+ }
36
+ }
37
+ ],
38
+ "attributes": {},
39
+ "zarr_format": 3,
40
+ "node_type": "array",
41
+ "storage_transformers": []
42
+ }
base_checkpoints/state.zarr/.mu[16].attn.c_v/zarr.json ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "shape": [
3
+ 2048,
4
+ 2048
5
+ ],
6
+ "data_type": "float32",
7
+ "chunk_grid": {
8
+ "name": "regular",
9
+ "configuration": {
10
+ "chunk_shape": [
11
+ 256,
12
+ 512
13
+ ]
14
+ }
15
+ },
16
+ "chunk_key_encoding": {
17
+ "name": "default",
18
+ "configuration": {
19
+ "separator": "/"
20
+ }
21
+ },
22
+ "fill_value": 0.0,
23
+ "codecs": [
24
+ {
25
+ "name": "bytes",
26
+ "configuration": {
27
+ "endian": "little"
28
+ }
29
+ },
30
+ {
31
+ "name": "zstd",
32
+ "configuration": {
33
+ "level": 0,
34
+ "checksum": false
35
+ }
36
+ }
37
+ ],
38
+ "attributes": {},
39
+ "zarr_format": 3,
40
+ "node_type": "array",
41
+ "storage_transformers": []
42
+ }
base_checkpoints/state.zarr/.mu[17].attn.c_k/zarr.json ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "shape": [
3
+ 2048,
4
+ 2048
5
+ ],
6
+ "data_type": "float32",
7
+ "chunk_grid": {
8
+ "name": "regular",
9
+ "configuration": {
10
+ "chunk_shape": [
11
+ 256,
12
+ 512
13
+ ]
14
+ }
15
+ },
16
+ "chunk_key_encoding": {
17
+ "name": "default",
18
+ "configuration": {
19
+ "separator": "/"
20
+ }
21
+ },
22
+ "fill_value": 0.0,
23
+ "codecs": [
24
+ {
25
+ "name": "bytes",
26
+ "configuration": {
27
+ "endian": "little"
28
+ }
29
+ },
30
+ {
31
+ "name": "zstd",
32
+ "configuration": {
33
+ "level": 0,
34
+ "checksum": false
35
+ }
36
+ }
37
+ ],
38
+ "attributes": {},
39
+ "zarr_format": 3,
40
+ "node_type": "array",
41
+ "storage_transformers": []
42
+ }
base_checkpoints/state.zarr/.mu[17].attn.c_q/zarr.json ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "shape": [
3
+ 2048,
4
+ 2048
5
+ ],
6
+ "data_type": "float32",
7
+ "chunk_grid": {
8
+ "name": "regular",
9
+ "configuration": {
10
+ "chunk_shape": [
11
+ 256,
12
+ 512
13
+ ]
14
+ }
15
+ },
16
+ "chunk_key_encoding": {
17
+ "name": "default",
18
+ "configuration": {
19
+ "separator": "/"
20
+ }
21
+ },
22
+ "fill_value": 0.0,
23
+ "codecs": [
24
+ {
25
+ "name": "bytes",
26
+ "configuration": {
27
+ "endian": "little"
28
+ }
29
+ },
30
+ {
31
+ "name": "zstd",
32
+ "configuration": {
33
+ "level": 0,
34
+ "checksum": false
35
+ }
36
+ }
37
+ ],
38
+ "attributes": {},
39
+ "zarr_format": 3,
40
+ "node_type": "array",
41
+ "storage_transformers": []
42
+ }
base_checkpoints/state.zarr/.mu[17].attn.c_v/zarr.json ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "shape": [
3
+ 2048,
4
+ 2048
5
+ ],
6
+ "data_type": "float32",
7
+ "chunk_grid": {
8
+ "name": "regular",
9
+ "configuration": {
10
+ "chunk_shape": [
11
+ 256,
12
+ 512
13
+ ]
14
+ }
15
+ },
16
+ "chunk_key_encoding": {
17
+ "name": "default",
18
+ "configuration": {
19
+ "separator": "/"
20
+ }
21
+ },
22
+ "fill_value": 0.0,
23
+ "codecs": [
24
+ {
25
+ "name": "bytes",
26
+ "configuration": {
27
+ "endian": "little"
28
+ }
29
+ },
30
+ {
31
+ "name": "zstd",
32
+ "configuration": {
33
+ "level": 0,
34
+ "checksum": false
35
+ }
36
+ }
37
+ ],
38
+ "attributes": {},
39
+ "zarr_format": 3,
40
+ "node_type": "array",
41
+ "storage_transformers": []
42
+ }
base_checkpoints/state.zarr/.mu[17].mlp.c_proj/zarr.json ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "shape": [
3
+ 8192,
4
+ 2048
5
+ ],
6
+ "data_type": "float32",
7
+ "chunk_grid": {
8
+ "name": "regular",
9
+ "configuration": {
10
+ "chunk_shape": [
11
+ 1024,
12
+ 256
13
+ ]
14
+ }
15
+ },
16
+ "chunk_key_encoding": {
17
+ "name": "default",
18
+ "configuration": {
19
+ "separator": "/"
20
+ }
21
+ },
22
+ "fill_value": 0.0,
23
+ "codecs": [
24
+ {
25
+ "name": "bytes",
26
+ "configuration": {
27
+ "endian": "little"
28
+ }
29
+ },
30
+ {
31
+ "name": "zstd",
32
+ "configuration": {
33
+ "level": 0,
34
+ "checksum": false
35
+ }
36
+ }
37
+ ],
38
+ "attributes": {},
39
+ "zarr_format": 3,
40
+ "node_type": "array",
41
+ "storage_transformers": []
42
+ }
base_checkpoints/state.zarr/.mu[18].attn.c_proj/zarr.json ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "shape": [
3
+ 2048,
4
+ 2048
5
+ ],
6
+ "data_type": "float32",
7
+ "chunk_grid": {
8
+ "name": "regular",
9
+ "configuration": {
10
+ "chunk_shape": [
11
+ 256,
12
+ 512
13
+ ]
14
+ }
15
+ },
16
+ "chunk_key_encoding": {
17
+ "name": "default",
18
+ "configuration": {
19
+ "separator": "/"
20
+ }
21
+ },
22
+ "fill_value": 0.0,
23
+ "codecs": [
24
+ {
25
+ "name": "bytes",
26
+ "configuration": {
27
+ "endian": "little"
28
+ }
29
+ },
30
+ {
31
+ "name": "zstd",
32
+ "configuration": {
33
+ "level": 0,
34
+ "checksum": false
35
+ }
36
+ }
37
+ ],
38
+ "attributes": {},
39
+ "zarr_format": 3,
40
+ "node_type": "array",
41
+ "storage_transformers": []
42
+ }
base_checkpoints/state.zarr/.mu[18].mlp.c_fc/zarr.json ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "shape": [
3
+ 2048,
4
+ 8192
5
+ ],
6
+ "data_type": "float32",
7
+ "chunk_grid": {
8
+ "name": "regular",
9
+ "configuration": {
10
+ "chunk_shape": [
11
+ 256,
12
+ 1024
13
+ ]
14
+ }
15
+ },
16
+ "chunk_key_encoding": {
17
+ "name": "default",
18
+ "configuration": {
19
+ "separator": "/"
20
+ }
21
+ },
22
+ "fill_value": 0.0,
23
+ "codecs": [
24
+ {
25
+ "name": "bytes",
26
+ "configuration": {
27
+ "endian": "little"
28
+ }
29
+ },
30
+ {
31
+ "name": "zstd",
32
+ "configuration": {
33
+ "level": 0,
34
+ "checksum": false
35
+ }
36
+ }
37
+ ],
38
+ "attributes": {},
39
+ "zarr_format": 3,
40
+ "node_type": "array",
41
+ "storage_transformers": []
42
+ }
base_checkpoints/state.zarr/.mu[18].mlp.c_proj/zarr.json ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "shape": [
3
+ 8192,
4
+ 2048
5
+ ],
6
+ "data_type": "float32",
7
+ "chunk_grid": {
8
+ "name": "regular",
9
+ "configuration": {
10
+ "chunk_shape": [
11
+ 1024,
12
+ 256
13
+ ]
14
+ }
15
+ },
16
+ "chunk_key_encoding": {
17
+ "name": "default",
18
+ "configuration": {
19
+ "separator": "/"
20
+ }
21
+ },
22
+ "fill_value": 0.0,
23
+ "codecs": [
24
+ {
25
+ "name": "bytes",
26
+ "configuration": {
27
+ "endian": "little"
28
+ }
29
+ },
30
+ {
31
+ "name": "zstd",
32
+ "configuration": {
33
+ "level": 0,
34
+ "checksum": false
35
+ }
36
+ }
37
+ ],
38
+ "attributes": {},
39
+ "zarr_format": 3,
40
+ "node_type": "array",
41
+ "storage_transformers": []
42
+ }
base_checkpoints/state.zarr/.mu[19].attn.c_proj/zarr.json ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "shape": [
3
+ 2048,
4
+ 2048
5
+ ],
6
+ "data_type": "float32",
7
+ "chunk_grid": {
8
+ "name": "regular",
9
+ "configuration": {
10
+ "chunk_shape": [
11
+ 256,
12
+ 512
13
+ ]
14
+ }
15
+ },
16
+ "chunk_key_encoding": {
17
+ "name": "default",
18
+ "configuration": {
19
+ "separator": "/"
20
+ }
21
+ },
22
+ "fill_value": 0.0,
23
+ "codecs": [
24
+ {
25
+ "name": "bytes",
26
+ "configuration": {
27
+ "endian": "little"
28
+ }
29
+ },
30
+ {
31
+ "name": "zstd",
32
+ "configuration": {
33
+ "level": 0,
34
+ "checksum": false
35
+ }
36
+ }
37
+ ],
38
+ "attributes": {},
39
+ "zarr_format": 3,
40
+ "node_type": "array",
41
+ "storage_transformers": []
42
+ }
base_checkpoints/state.zarr/.mu[19].mlp.c_fc/zarr.json ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "shape": [
3
+ 2048,
4
+ 8192
5
+ ],
6
+ "data_type": "float32",
7
+ "chunk_grid": {
8
+ "name": "regular",
9
+ "configuration": {
10
+ "chunk_shape": [
11
+ 256,
12
+ 1024
13
+ ]
14
+ }
15
+ },
16
+ "chunk_key_encoding": {
17
+ "name": "default",
18
+ "configuration": {
19
+ "separator": "/"
20
+ }
21
+ },
22
+ "fill_value": 0.0,
23
+ "codecs": [
24
+ {
25
+ "name": "bytes",
26
+ "configuration": {
27
+ "endian": "little"
28
+ }
29
+ },
30
+ {
31
+ "name": "zstd",
32
+ "configuration": {
33
+ "level": 0,
34
+ "checksum": false
35
+ }
36
+ }
37
+ ],
38
+ "attributes": {},
39
+ "zarr_format": 3,
40
+ "node_type": "array",
41
+ "storage_transformers": []
42
+ }
base_checkpoints/state.zarr/.mu[1].attn.c_k/zarr.json ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "shape": [
3
+ 2048,
4
+ 2048
5
+ ],
6
+ "data_type": "float32",
7
+ "chunk_grid": {
8
+ "name": "regular",
9
+ "configuration": {
10
+ "chunk_shape": [
11
+ 256,
12
+ 512
13
+ ]
14
+ }
15
+ },
16
+ "chunk_key_encoding": {
17
+ "name": "default",
18
+ "configuration": {
19
+ "separator": "/"
20
+ }
21
+ },
22
+ "fill_value": 0.0,
23
+ "codecs": [
24
+ {
25
+ "name": "bytes",
26
+ "configuration": {
27
+ "endian": "little"
28
+ }
29
+ },
30
+ {
31
+ "name": "zstd",
32
+ "configuration": {
33
+ "level": 0,
34
+ "checksum": false
35
+ }
36
+ }
37
+ ],
38
+ "attributes": {},
39
+ "zarr_format": 3,
40
+ "node_type": "array",
41
+ "storage_transformers": []
42
+ }
base_checkpoints/state.zarr/.mu[1].attn.c_q/zarr.json ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "shape": [
3
+ 2048,
4
+ 2048
5
+ ],
6
+ "data_type": "float32",
7
+ "chunk_grid": {
8
+ "name": "regular",
9
+ "configuration": {
10
+ "chunk_shape": [
11
+ 256,
12
+ 512
13
+ ]
14
+ }
15
+ },
16
+ "chunk_key_encoding": {
17
+ "name": "default",
18
+ "configuration": {
19
+ "separator": "/"
20
+ }
21
+ },
22
+ "fill_value": 0.0,
23
+ "codecs": [
24
+ {
25
+ "name": "bytes",
26
+ "configuration": {
27
+ "endian": "little"
28
+ }
29
+ },
30
+ {
31
+ "name": "zstd",
32
+ "configuration": {
33
+ "level": 0,
34
+ "checksum": false
35
+ }
36
+ }
37
+ ],
38
+ "attributes": {},
39
+ "zarr_format": 3,
40
+ "node_type": "array",
41
+ "storage_transformers": []
42
+ }
base_checkpoints/state.zarr/.mu[1].attn.c_v/zarr.json ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "shape": [
3
+ 2048,
4
+ 2048
5
+ ],
6
+ "data_type": "float32",
7
+ "chunk_grid": {
8
+ "name": "regular",
9
+ "configuration": {
10
+ "chunk_shape": [
11
+ 256,
12
+ 512
13
+ ]
14
+ }
15
+ },
16
+ "chunk_key_encoding": {
17
+ "name": "default",
18
+ "configuration": {
19
+ "separator": "/"
20
+ }
21
+ },
22
+ "fill_value": 0.0,
23
+ "codecs": [
24
+ {
25
+ "name": "bytes",
26
+ "configuration": {
27
+ "endian": "little"
28
+ }
29
+ },
30
+ {
31
+ "name": "zstd",
32
+ "configuration": {
33
+ "level": 0,
34
+ "checksum": false
35
+ }
36
+ }
37
+ ],
38
+ "attributes": {},
39
+ "zarr_format": 3,
40
+ "node_type": "array",
41
+ "storage_transformers": []
42
+ }
base_checkpoints/state.zarr/.mu[20].mlp.c_fc/zarr.json ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "shape": [
3
+ 2048,
4
+ 8192
5
+ ],
6
+ "data_type": "float32",
7
+ "chunk_grid": {
8
+ "name": "regular",
9
+ "configuration": {
10
+ "chunk_shape": [
11
+ 256,
12
+ 1024
13
+ ]
14
+ }
15
+ },
16
+ "chunk_key_encoding": {
17
+ "name": "default",
18
+ "configuration": {
19
+ "separator": "/"
20
+ }
21
+ },
22
+ "fill_value": 0.0,
23
+ "codecs": [
24
+ {
25
+ "name": "bytes",
26
+ "configuration": {
27
+ "endian": "little"
28
+ }
29
+ },
30
+ {
31
+ "name": "zstd",
32
+ "configuration": {
33
+ "level": 0,
34
+ "checksum": false
35
+ }
36
+ }
37
+ ],
38
+ "attributes": {},
39
+ "zarr_format": 3,
40
+ "node_type": "array",
41
+ "storage_transformers": []
42
+ }
base_checkpoints/state.zarr/.mu[20].mlp.c_proj/zarr.json ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "shape": [
3
+ 8192,
4
+ 2048
5
+ ],
6
+ "data_type": "float32",
7
+ "chunk_grid": {
8
+ "name": "regular",
9
+ "configuration": {
10
+ "chunk_shape": [
11
+ 1024,
12
+ 256
13
+ ]
14
+ }
15
+ },
16
+ "chunk_key_encoding": {
17
+ "name": "default",
18
+ "configuration": {
19
+ "separator": "/"
20
+ }
21
+ },
22
+ "fill_value": 0.0,
23
+ "codecs": [
24
+ {
25
+ "name": "bytes",
26
+ "configuration": {
27
+ "endian": "little"
28
+ }
29
+ },
30
+ {
31
+ "name": "zstd",
32
+ "configuration": {
33
+ "level": 0,
34
+ "checksum": false
35
+ }
36
+ }
37
+ ],
38
+ "attributes": {},
39
+ "zarr_format": 3,
40
+ "node_type": "array",
41
+ "storage_transformers": []
42
+ }
base_checkpoints/state.zarr/.mu[21].mlp.c_fc/zarr.json ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "shape": [
3
+ 2048,
4
+ 8192
5
+ ],
6
+ "data_type": "float32",
7
+ "chunk_grid": {
8
+ "name": "regular",
9
+ "configuration": {
10
+ "chunk_shape": [
11
+ 256,
12
+ 1024
13
+ ]
14
+ }
15
+ },
16
+ "chunk_key_encoding": {
17
+ "name": "default",
18
+ "configuration": {
19
+ "separator": "/"
20
+ }
21
+ },
22
+ "fill_value": 0.0,
23
+ "codecs": [
24
+ {
25
+ "name": "bytes",
26
+ "configuration": {
27
+ "endian": "little"
28
+ }
29
+ },
30
+ {
31
+ "name": "zstd",
32
+ "configuration": {
33
+ "level": 0,
34
+ "checksum": false
35
+ }
36
+ }
37
+ ],
38
+ "attributes": {},
39
+ "zarr_format": 3,
40
+ "node_type": "array",
41
+ "storage_transformers": []
42
+ }
base_checkpoints/state.zarr/.mu[22].attn.c_k/zarr.json ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "shape": [
3
+ 2048,
4
+ 2048
5
+ ],
6
+ "data_type": "float32",
7
+ "chunk_grid": {
8
+ "name": "regular",
9
+ "configuration": {
10
+ "chunk_shape": [
11
+ 256,
12
+ 512
13
+ ]
14
+ }
15
+ },
16
+ "chunk_key_encoding": {
17
+ "name": "default",
18
+ "configuration": {
19
+ "separator": "/"
20
+ }
21
+ },
22
+ "fill_value": 0.0,
23
+ "codecs": [
24
+ {
25
+ "name": "bytes",
26
+ "configuration": {
27
+ "endian": "little"
28
+ }
29
+ },
30
+ {
31
+ "name": "zstd",
32
+ "configuration": {
33
+ "level": 0,
34
+ "checksum": false
35
+ }
36
+ }
37
+ ],
38
+ "attributes": {},
39
+ "zarr_format": 3,
40
+ "node_type": "array",
41
+ "storage_transformers": []
42
+ }
base_checkpoints/state.zarr/.mu[22].attn.c_proj/zarr.json ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "shape": [
3
+ 2048,
4
+ 2048
5
+ ],
6
+ "data_type": "float32",
7
+ "chunk_grid": {
8
+ "name": "regular",
9
+ "configuration": {
10
+ "chunk_shape": [
11
+ 256,
12
+ 512
13
+ ]
14
+ }
15
+ },
16
+ "chunk_key_encoding": {
17
+ "name": "default",
18
+ "configuration": {
19
+ "separator": "/"
20
+ }
21
+ },
22
+ "fill_value": 0.0,
23
+ "codecs": [
24
+ {
25
+ "name": "bytes",
26
+ "configuration": {
27
+ "endian": "little"
28
+ }
29
+ },
30
+ {
31
+ "name": "zstd",
32
+ "configuration": {
33
+ "level": 0,
34
+ "checksum": false
35
+ }
36
+ }
37
+ ],
38
+ "attributes": {},
39
+ "zarr_format": 3,
40
+ "node_type": "array",
41
+ "storage_transformers": []
42
+ }
base_checkpoints/state.zarr/.mu[22].attn.c_q/zarr.json ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "shape": [
3
+ 2048,
4
+ 2048
5
+ ],
6
+ "data_type": "float32",
7
+ "chunk_grid": {
8
+ "name": "regular",
9
+ "configuration": {
10
+ "chunk_shape": [
11
+ 256,
12
+ 512
13
+ ]
14
+ }
15
+ },
16
+ "chunk_key_encoding": {
17
+ "name": "default",
18
+ "configuration": {
19
+ "separator": "/"
20
+ }
21
+ },
22
+ "fill_value": 0.0,
23
+ "codecs": [
24
+ {
25
+ "name": "bytes",
26
+ "configuration": {
27
+ "endian": "little"
28
+ }
29
+ },
30
+ {
31
+ "name": "zstd",
32
+ "configuration": {
33
+ "level": 0,
34
+ "checksum": false
35
+ }
36
+ }
37
+ ],
38
+ "attributes": {},
39
+ "zarr_format": 3,
40
+ "node_type": "array",
41
+ "storage_transformers": []
42
+ }
base_checkpoints/state.zarr/.mu[22].attn.c_v/zarr.json ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "shape": [
3
+ 2048,
4
+ 2048
5
+ ],
6
+ "data_type": "float32",
7
+ "chunk_grid": {
8
+ "name": "regular",
9
+ "configuration": {
10
+ "chunk_shape": [
11
+ 256,
12
+ 512
13
+ ]
14
+ }
15
+ },
16
+ "chunk_key_encoding": {
17
+ "name": "default",
18
+ "configuration": {
19
+ "separator": "/"
20
+ }
21
+ },
22
+ "fill_value": 0.0,
23
+ "codecs": [
24
+ {
25
+ "name": "bytes",
26
+ "configuration": {
27
+ "endian": "little"
28
+ }
29
+ },
30
+ {
31
+ "name": "zstd",
32
+ "configuration": {
33
+ "level": 0,
34
+ "checksum": false
35
+ }
36
+ }
37
+ ],
38
+ "attributes": {},
39
+ "zarr_format": 3,
40
+ "node_type": "array",
41
+ "storage_transformers": []
42
+ }
base_checkpoints/state.zarr/.mu[23].attn.c_k/zarr.json ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "shape": [
3
+ 2048,
4
+ 2048
5
+ ],
6
+ "data_type": "float32",
7
+ "chunk_grid": {
8
+ "name": "regular",
9
+ "configuration": {
10
+ "chunk_shape": [
11
+ 256,
12
+ 512
13
+ ]
14
+ }
15
+ },
16
+ "chunk_key_encoding": {
17
+ "name": "default",
18
+ "configuration": {
19
+ "separator": "/"
20
+ }
21
+ },
22
+ "fill_value": 0.0,
23
+ "codecs": [
24
+ {
25
+ "name": "bytes",
26
+ "configuration": {
27
+ "endian": "little"
28
+ }
29
+ },
30
+ {
31
+ "name": "zstd",
32
+ "configuration": {
33
+ "level": 0,
34
+ "checksum": false
35
+ }
36
+ }
37
+ ],
38
+ "attributes": {},
39
+ "zarr_format": 3,
40
+ "node_type": "array",
41
+ "storage_transformers": []
42
+ }
base_checkpoints/state.zarr/.mu[23].attn.c_proj/zarr.json ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "shape": [
3
+ 2048,
4
+ 2048
5
+ ],
6
+ "data_type": "float32",
7
+ "chunk_grid": {
8
+ "name": "regular",
9
+ "configuration": {
10
+ "chunk_shape": [
11
+ 256,
12
+ 512
13
+ ]
14
+ }
15
+ },
16
+ "chunk_key_encoding": {
17
+ "name": "default",
18
+ "configuration": {
19
+ "separator": "/"
20
+ }
21
+ },
22
+ "fill_value": 0.0,
23
+ "codecs": [
24
+ {
25
+ "name": "bytes",
26
+ "configuration": {
27
+ "endian": "little"
28
+ }
29
+ },
30
+ {
31
+ "name": "zstd",
32
+ "configuration": {
33
+ "level": 0,
34
+ "checksum": false
35
+ }
36
+ }
37
+ ],
38
+ "attributes": {},
39
+ "zarr_format": 3,
40
+ "node_type": "array",
41
+ "storage_transformers": []
42
+ }
base_checkpoints/state.zarr/.mu[23].attn.c_q/zarr.json ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "shape": [
3
+ 2048,
4
+ 2048
5
+ ],
6
+ "data_type": "float32",
7
+ "chunk_grid": {
8
+ "name": "regular",
9
+ "configuration": {
10
+ "chunk_shape": [
11
+ 256,
12
+ 512
13
+ ]
14
+ }
15
+ },
16
+ "chunk_key_encoding": {
17
+ "name": "default",
18
+ "configuration": {
19
+ "separator": "/"
20
+ }
21
+ },
22
+ "fill_value": 0.0,
23
+ "codecs": [
24
+ {
25
+ "name": "bytes",
26
+ "configuration": {
27
+ "endian": "little"
28
+ }
29
+ },
30
+ {
31
+ "name": "zstd",
32
+ "configuration": {
33
+ "level": 0,
34
+ "checksum": false
35
+ }
36
+ }
37
+ ],
38
+ "attributes": {},
39
+ "zarr_format": 3,
40
+ "node_type": "array",
41
+ "storage_transformers": []
42
+ }