smohammadi commited on
Commit
56d9e71
·
verified ·
1 Parent(s): 48d28dd

Add files using upload-large-folder tool

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +4 -0
  2. base_checkpoints/model.zarr/.h[0].attn.c_proj/zarr.json +42 -0
  3. base_checkpoints/model.zarr/.h[10].mlp.c_proj/zarr.json +42 -0
  4. base_checkpoints/model.zarr/.h[1].mlp.c_fc/zarr.json +42 -0
  5. base_checkpoints/model.zarr/.h[3].mlp.c_proj/zarr.json +42 -0
  6. base_checkpoints/model.zarr/.h[5].attn.c_q/zarr.json +42 -0
  7. base_checkpoints/model.zarr/.h[5].attn.c_v/zarr.json +42 -0
  8. base_checkpoints/model.zarr/.h[6].mlp.c_proj/zarr.json +42 -0
  9. base_checkpoints/model.zarr/.h[7].attn.c_proj/zarr.json +42 -0
  10. base_checkpoints/model.zarr/.h[8].attn.c_k/zarr.json +42 -0
  11. base_checkpoints/model.zarr/.h[8].attn.c_q/zarr.json +42 -0
  12. base_checkpoints/model.zarr/.h[8].attn.c_v/zarr.json +42 -0
  13. base_checkpoints/model.zarr/.h[9].mlp.c_proj/zarr.json +42 -0
  14. dpo_checkpoints/model.zarr/zarr.json +14 -0
  15. dpo_checkpoints/state.zarr/.adamw_mu[1]/zarr.json +42 -0
  16. dpo_checkpoints/state.zarr/.adamw_nu[1]/zarr.json +42 -0
  17. dpo_checkpoints/state.zarr/.mu[0].attn.c_k/zarr.json +42 -0
  18. dpo_checkpoints/state.zarr/.mu[0].attn.c_proj/zarr.json +42 -0
  19. dpo_checkpoints/state.zarr/.mu[0].attn.c_q/zarr.json +42 -0
  20. dpo_checkpoints/state.zarr/.mu[0].attn.c_v/zarr.json +42 -0
  21. dpo_checkpoints/state.zarr/.mu[0].mlp.c_fc/zarr.json +42 -0
  22. dpo_checkpoints/state.zarr/.mu[0].mlp.c_proj/zarr.json +42 -0
  23. dpo_checkpoints/state.zarr/.mu[10].attn.c_k/zarr.json +42 -0
  24. dpo_checkpoints/state.zarr/.mu[10].attn.c_q/zarr.json +42 -0
  25. dpo_checkpoints/state.zarr/.mu[10].attn.c_v/zarr.json +42 -0
  26. dpo_checkpoints/state.zarr/.mu[10].mlp.c_proj/zarr.json +42 -0
  27. dpo_checkpoints/state.zarr/.mu[11].attn.c_k/zarr.json +42 -0
  28. dpo_checkpoints/state.zarr/.mu[11].attn.c_q/zarr.json +42 -0
  29. dpo_checkpoints/state.zarr/.mu[11].attn.c_v/zarr.json +42 -0
  30. dpo_checkpoints/state.zarr/.mu[11].mlp.c_fc/zarr.json +42 -0
  31. dpo_checkpoints/state.zarr/.mu[11].mlp.c_proj/zarr.json +42 -0
  32. dpo_checkpoints/state.zarr/.mu[1].attn.c_k/zarr.json +42 -0
  33. dpo_checkpoints/state.zarr/.mu[1].attn.c_q/zarr.json +42 -0
  34. dpo_checkpoints/state.zarr/.mu[1].attn.c_v/zarr.json +42 -0
  35. dpo_checkpoints/state.zarr/.mu[1].mlp.c_proj/zarr.json +42 -0
  36. dpo_checkpoints/state.zarr/.mu[2].attn.c_k/zarr.json +42 -0
  37. dpo_checkpoints/state.zarr/.mu[2].attn.c_proj/zarr.json +42 -0
  38. dpo_checkpoints/state.zarr/.mu[2].attn.c_q/zarr.json +42 -0
  39. dpo_checkpoints/state.zarr/.mu[2].attn.c_v/zarr.json +42 -0
  40. dpo_checkpoints/state.zarr/.mu[2].mlp.c_fc/zarr.json +42 -0
  41. dpo_checkpoints/state.zarr/.mu[3].attn.c_proj/zarr.json +42 -0
  42. dpo_checkpoints/state.zarr/.mu[3].mlp.c_fc/zarr.json +42 -0
  43. dpo_checkpoints/state.zarr/.mu[3].mlp.c_proj/zarr.json +42 -0
  44. dpo_checkpoints/state.zarr/.mu[4].attn.c_k/zarr.json +42 -0
  45. dpo_checkpoints/state.zarr/.mu[4].attn.c_proj/zarr.json +42 -0
  46. dpo_checkpoints/state.zarr/.mu[4].attn.c_q/zarr.json +42 -0
  47. dpo_checkpoints/state.zarr/.mu[4].attn.c_v/zarr.json +42 -0
  48. dpo_checkpoints/state.zarr/.mu[4].mlp.c_fc/zarr.json +42 -0
  49. dpo_checkpoints/state.zarr/.mu[4].mlp.c_proj/zarr.json +42 -0
  50. dpo_checkpoints/state.zarr/.mu[5].attn.c_proj/zarr.json +42 -0
.gitattributes CHANGED
@@ -44,3 +44,7 @@ sft_checkpoints/state.zarr/.mu\[2\].mlp.c_proj/c/1/2 filter=lfs diff=lfs merge=l
44
  sft_checkpoints/state.zarr/.mu\[2\].mlp.c_proj/c/3/0 filter=lfs diff=lfs merge=lfs -text
45
  sft_checkpoints/state.zarr/.mu\[2\].mlp.c_proj/c/3/1 filter=lfs diff=lfs merge=lfs -text
46
  sft_checkpoints/state.zarr/.mu\[2\].mlp.c_proj/c/3/3 filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
44
  sft_checkpoints/state.zarr/.mu\[2\].mlp.c_proj/c/3/0 filter=lfs diff=lfs merge=lfs -text
45
  sft_checkpoints/state.zarr/.mu\[2\].mlp.c_proj/c/3/1 filter=lfs diff=lfs merge=lfs -text
46
  sft_checkpoints/state.zarr/.mu\[2\].mlp.c_proj/c/3/3 filter=lfs diff=lfs merge=lfs -text
47
+ sft_checkpoints/state.zarr/.mu\[2\].mlp.c_proj/c/3/2 filter=lfs diff=lfs merge=lfs -text
48
+ sft_checkpoints/state.zarr/.mu\[2\].mlp.c_proj/c/2/0 filter=lfs diff=lfs merge=lfs -text
49
+ sft_checkpoints/state.zarr/.mu\[2\].mlp.c_proj/c/2/1 filter=lfs diff=lfs merge=lfs -text
50
+ sft_checkpoints/state.zarr/.mu\[2\].mlp.c_proj/c/2/3 filter=lfs diff=lfs merge=lfs -text
base_checkpoints/model.zarr/.h[0].attn.c_proj/zarr.json ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "shape": [
3
+ 768,
4
+ 768
5
+ ],
6
+ "data_type": "float32",
7
+ "chunk_grid": {
8
+ "name": "regular",
9
+ "configuration": {
10
+ "chunk_shape": [
11
+ 192,
12
+ 384
13
+ ]
14
+ }
15
+ },
16
+ "chunk_key_encoding": {
17
+ "name": "default",
18
+ "configuration": {
19
+ "separator": "/"
20
+ }
21
+ },
22
+ "fill_value": 0.0,
23
+ "codecs": [
24
+ {
25
+ "name": "bytes",
26
+ "configuration": {
27
+ "endian": "little"
28
+ }
29
+ },
30
+ {
31
+ "name": "zstd",
32
+ "configuration": {
33
+ "level": 0,
34
+ "checksum": false
35
+ }
36
+ }
37
+ ],
38
+ "attributes": {},
39
+ "zarr_format": 3,
40
+ "node_type": "array",
41
+ "storage_transformers": []
42
+ }
base_checkpoints/model.zarr/.h[10].mlp.c_proj/zarr.json ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "shape": [
3
+ 3072,
4
+ 768
5
+ ],
6
+ "data_type": "float32",
7
+ "chunk_grid": {
8
+ "name": "regular",
9
+ "configuration": {
10
+ "chunk_shape": [
11
+ 768,
12
+ 192
13
+ ]
14
+ }
15
+ },
16
+ "chunk_key_encoding": {
17
+ "name": "default",
18
+ "configuration": {
19
+ "separator": "/"
20
+ }
21
+ },
22
+ "fill_value": 0.0,
23
+ "codecs": [
24
+ {
25
+ "name": "bytes",
26
+ "configuration": {
27
+ "endian": "little"
28
+ }
29
+ },
30
+ {
31
+ "name": "zstd",
32
+ "configuration": {
33
+ "level": 0,
34
+ "checksum": false
35
+ }
36
+ }
37
+ ],
38
+ "attributes": {},
39
+ "zarr_format": 3,
40
+ "node_type": "array",
41
+ "storage_transformers": []
42
+ }
base_checkpoints/model.zarr/.h[1].mlp.c_fc/zarr.json ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "shape": [
3
+ 768,
4
+ 3072
5
+ ],
6
+ "data_type": "float32",
7
+ "chunk_grid": {
8
+ "name": "regular",
9
+ "configuration": {
10
+ "chunk_shape": [
11
+ 192,
12
+ 768
13
+ ]
14
+ }
15
+ },
16
+ "chunk_key_encoding": {
17
+ "name": "default",
18
+ "configuration": {
19
+ "separator": "/"
20
+ }
21
+ },
22
+ "fill_value": 0.0,
23
+ "codecs": [
24
+ {
25
+ "name": "bytes",
26
+ "configuration": {
27
+ "endian": "little"
28
+ }
29
+ },
30
+ {
31
+ "name": "zstd",
32
+ "configuration": {
33
+ "level": 0,
34
+ "checksum": false
35
+ }
36
+ }
37
+ ],
38
+ "attributes": {},
39
+ "zarr_format": 3,
40
+ "node_type": "array",
41
+ "storage_transformers": []
42
+ }
base_checkpoints/model.zarr/.h[3].mlp.c_proj/zarr.json ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "shape": [
3
+ 3072,
4
+ 768
5
+ ],
6
+ "data_type": "float32",
7
+ "chunk_grid": {
8
+ "name": "regular",
9
+ "configuration": {
10
+ "chunk_shape": [
11
+ 768,
12
+ 192
13
+ ]
14
+ }
15
+ },
16
+ "chunk_key_encoding": {
17
+ "name": "default",
18
+ "configuration": {
19
+ "separator": "/"
20
+ }
21
+ },
22
+ "fill_value": 0.0,
23
+ "codecs": [
24
+ {
25
+ "name": "bytes",
26
+ "configuration": {
27
+ "endian": "little"
28
+ }
29
+ },
30
+ {
31
+ "name": "zstd",
32
+ "configuration": {
33
+ "level": 0,
34
+ "checksum": false
35
+ }
36
+ }
37
+ ],
38
+ "attributes": {},
39
+ "zarr_format": 3,
40
+ "node_type": "array",
41
+ "storage_transformers": []
42
+ }
base_checkpoints/model.zarr/.h[5].attn.c_q/zarr.json ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "shape": [
3
+ 768,
4
+ 768
5
+ ],
6
+ "data_type": "float32",
7
+ "chunk_grid": {
8
+ "name": "regular",
9
+ "configuration": {
10
+ "chunk_shape": [
11
+ 192,
12
+ 384
13
+ ]
14
+ }
15
+ },
16
+ "chunk_key_encoding": {
17
+ "name": "default",
18
+ "configuration": {
19
+ "separator": "/"
20
+ }
21
+ },
22
+ "fill_value": 0.0,
23
+ "codecs": [
24
+ {
25
+ "name": "bytes",
26
+ "configuration": {
27
+ "endian": "little"
28
+ }
29
+ },
30
+ {
31
+ "name": "zstd",
32
+ "configuration": {
33
+ "level": 0,
34
+ "checksum": false
35
+ }
36
+ }
37
+ ],
38
+ "attributes": {},
39
+ "zarr_format": 3,
40
+ "node_type": "array",
41
+ "storage_transformers": []
42
+ }
base_checkpoints/model.zarr/.h[5].attn.c_v/zarr.json ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "shape": [
3
+ 768,
4
+ 768
5
+ ],
6
+ "data_type": "float32",
7
+ "chunk_grid": {
8
+ "name": "regular",
9
+ "configuration": {
10
+ "chunk_shape": [
11
+ 192,
12
+ 384
13
+ ]
14
+ }
15
+ },
16
+ "chunk_key_encoding": {
17
+ "name": "default",
18
+ "configuration": {
19
+ "separator": "/"
20
+ }
21
+ },
22
+ "fill_value": 0.0,
23
+ "codecs": [
24
+ {
25
+ "name": "bytes",
26
+ "configuration": {
27
+ "endian": "little"
28
+ }
29
+ },
30
+ {
31
+ "name": "zstd",
32
+ "configuration": {
33
+ "level": 0,
34
+ "checksum": false
35
+ }
36
+ }
37
+ ],
38
+ "attributes": {},
39
+ "zarr_format": 3,
40
+ "node_type": "array",
41
+ "storage_transformers": []
42
+ }
base_checkpoints/model.zarr/.h[6].mlp.c_proj/zarr.json ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "shape": [
3
+ 3072,
4
+ 768
5
+ ],
6
+ "data_type": "float32",
7
+ "chunk_grid": {
8
+ "name": "regular",
9
+ "configuration": {
10
+ "chunk_shape": [
11
+ 768,
12
+ 192
13
+ ]
14
+ }
15
+ },
16
+ "chunk_key_encoding": {
17
+ "name": "default",
18
+ "configuration": {
19
+ "separator": "/"
20
+ }
21
+ },
22
+ "fill_value": 0.0,
23
+ "codecs": [
24
+ {
25
+ "name": "bytes",
26
+ "configuration": {
27
+ "endian": "little"
28
+ }
29
+ },
30
+ {
31
+ "name": "zstd",
32
+ "configuration": {
33
+ "level": 0,
34
+ "checksum": false
35
+ }
36
+ }
37
+ ],
38
+ "attributes": {},
39
+ "zarr_format": 3,
40
+ "node_type": "array",
41
+ "storage_transformers": []
42
+ }
base_checkpoints/model.zarr/.h[7].attn.c_proj/zarr.json ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "shape": [
3
+ 768,
4
+ 768
5
+ ],
6
+ "data_type": "float32",
7
+ "chunk_grid": {
8
+ "name": "regular",
9
+ "configuration": {
10
+ "chunk_shape": [
11
+ 192,
12
+ 384
13
+ ]
14
+ }
15
+ },
16
+ "chunk_key_encoding": {
17
+ "name": "default",
18
+ "configuration": {
19
+ "separator": "/"
20
+ }
21
+ },
22
+ "fill_value": 0.0,
23
+ "codecs": [
24
+ {
25
+ "name": "bytes",
26
+ "configuration": {
27
+ "endian": "little"
28
+ }
29
+ },
30
+ {
31
+ "name": "zstd",
32
+ "configuration": {
33
+ "level": 0,
34
+ "checksum": false
35
+ }
36
+ }
37
+ ],
38
+ "attributes": {},
39
+ "zarr_format": 3,
40
+ "node_type": "array",
41
+ "storage_transformers": []
42
+ }
base_checkpoints/model.zarr/.h[8].attn.c_k/zarr.json ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "shape": [
3
+ 768,
4
+ 768
5
+ ],
6
+ "data_type": "float32",
7
+ "chunk_grid": {
8
+ "name": "regular",
9
+ "configuration": {
10
+ "chunk_shape": [
11
+ 192,
12
+ 384
13
+ ]
14
+ }
15
+ },
16
+ "chunk_key_encoding": {
17
+ "name": "default",
18
+ "configuration": {
19
+ "separator": "/"
20
+ }
21
+ },
22
+ "fill_value": 0.0,
23
+ "codecs": [
24
+ {
25
+ "name": "bytes",
26
+ "configuration": {
27
+ "endian": "little"
28
+ }
29
+ },
30
+ {
31
+ "name": "zstd",
32
+ "configuration": {
33
+ "level": 0,
34
+ "checksum": false
35
+ }
36
+ }
37
+ ],
38
+ "attributes": {},
39
+ "zarr_format": 3,
40
+ "node_type": "array",
41
+ "storage_transformers": []
42
+ }
base_checkpoints/model.zarr/.h[8].attn.c_q/zarr.json ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "shape": [
3
+ 768,
4
+ 768
5
+ ],
6
+ "data_type": "float32",
7
+ "chunk_grid": {
8
+ "name": "regular",
9
+ "configuration": {
10
+ "chunk_shape": [
11
+ 192,
12
+ 384
13
+ ]
14
+ }
15
+ },
16
+ "chunk_key_encoding": {
17
+ "name": "default",
18
+ "configuration": {
19
+ "separator": "/"
20
+ }
21
+ },
22
+ "fill_value": 0.0,
23
+ "codecs": [
24
+ {
25
+ "name": "bytes",
26
+ "configuration": {
27
+ "endian": "little"
28
+ }
29
+ },
30
+ {
31
+ "name": "zstd",
32
+ "configuration": {
33
+ "level": 0,
34
+ "checksum": false
35
+ }
36
+ }
37
+ ],
38
+ "attributes": {},
39
+ "zarr_format": 3,
40
+ "node_type": "array",
41
+ "storage_transformers": []
42
+ }
base_checkpoints/model.zarr/.h[8].attn.c_v/zarr.json ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "shape": [
3
+ 768,
4
+ 768
5
+ ],
6
+ "data_type": "float32",
7
+ "chunk_grid": {
8
+ "name": "regular",
9
+ "configuration": {
10
+ "chunk_shape": [
11
+ 192,
12
+ 384
13
+ ]
14
+ }
15
+ },
16
+ "chunk_key_encoding": {
17
+ "name": "default",
18
+ "configuration": {
19
+ "separator": "/"
20
+ }
21
+ },
22
+ "fill_value": 0.0,
23
+ "codecs": [
24
+ {
25
+ "name": "bytes",
26
+ "configuration": {
27
+ "endian": "little"
28
+ }
29
+ },
30
+ {
31
+ "name": "zstd",
32
+ "configuration": {
33
+ "level": 0,
34
+ "checksum": false
35
+ }
36
+ }
37
+ ],
38
+ "attributes": {},
39
+ "zarr_format": 3,
40
+ "node_type": "array",
41
+ "storage_transformers": []
42
+ }
base_checkpoints/model.zarr/.h[9].mlp.c_proj/zarr.json ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "shape": [
3
+ 3072,
4
+ 768
5
+ ],
6
+ "data_type": "float32",
7
+ "chunk_grid": {
8
+ "name": "regular",
9
+ "configuration": {
10
+ "chunk_shape": [
11
+ 768,
12
+ 192
13
+ ]
14
+ }
15
+ },
16
+ "chunk_key_encoding": {
17
+ "name": "default",
18
+ "configuration": {
19
+ "separator": "/"
20
+ }
21
+ },
22
+ "fill_value": 0.0,
23
+ "codecs": [
24
+ {
25
+ "name": "bytes",
26
+ "configuration": {
27
+ "endian": "little"
28
+ }
29
+ },
30
+ {
31
+ "name": "zstd",
32
+ "configuration": {
33
+ "level": 0,
34
+ "checksum": false
35
+ }
36
+ }
37
+ ],
38
+ "attributes": {},
39
+ "zarr_format": 3,
40
+ "node_type": "array",
41
+ "storage_transformers": []
42
+ }
dpo_checkpoints/model.zarr/zarr.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "attributes": {
3
+ "config": {
4
+ "sequence_len": 1024,
5
+ "vocab_size": 32768,
6
+ "n_layer": 12,
7
+ "n_head": 3,
8
+ "n_kv_head": 3,
9
+ "n_embed": 768
10
+ }
11
+ },
12
+ "zarr_format": 3,
13
+ "node_type": "group"
14
+ }
dpo_checkpoints/state.zarr/.adamw_mu[1]/zarr.json ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "shape": [
3
+ 768,
4
+ 32768
5
+ ],
6
+ "data_type": "float32",
7
+ "chunk_grid": {
8
+ "name": "regular",
9
+ "configuration": {
10
+ "chunk_shape": [
11
+ 48,
12
+ 4096
13
+ ]
14
+ }
15
+ },
16
+ "chunk_key_encoding": {
17
+ "name": "default",
18
+ "configuration": {
19
+ "separator": "/"
20
+ }
21
+ },
22
+ "fill_value": 0.0,
23
+ "codecs": [
24
+ {
25
+ "name": "bytes",
26
+ "configuration": {
27
+ "endian": "little"
28
+ }
29
+ },
30
+ {
31
+ "name": "zstd",
32
+ "configuration": {
33
+ "level": 0,
34
+ "checksum": false
35
+ }
36
+ }
37
+ ],
38
+ "attributes": {},
39
+ "zarr_format": 3,
40
+ "node_type": "array",
41
+ "storage_transformers": []
42
+ }
dpo_checkpoints/state.zarr/.adamw_nu[1]/zarr.json ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "shape": [
3
+ 768,
4
+ 32768
5
+ ],
6
+ "data_type": "float32",
7
+ "chunk_grid": {
8
+ "name": "regular",
9
+ "configuration": {
10
+ "chunk_shape": [
11
+ 48,
12
+ 4096
13
+ ]
14
+ }
15
+ },
16
+ "chunk_key_encoding": {
17
+ "name": "default",
18
+ "configuration": {
19
+ "separator": "/"
20
+ }
21
+ },
22
+ "fill_value": 0.0,
23
+ "codecs": [
24
+ {
25
+ "name": "bytes",
26
+ "configuration": {
27
+ "endian": "little"
28
+ }
29
+ },
30
+ {
31
+ "name": "zstd",
32
+ "configuration": {
33
+ "level": 0,
34
+ "checksum": false
35
+ }
36
+ }
37
+ ],
38
+ "attributes": {},
39
+ "zarr_format": 3,
40
+ "node_type": "array",
41
+ "storage_transformers": []
42
+ }
dpo_checkpoints/state.zarr/.mu[0].attn.c_k/zarr.json ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "shape": [
3
+ 768,
4
+ 768
5
+ ],
6
+ "data_type": "float32",
7
+ "chunk_grid": {
8
+ "name": "regular",
9
+ "configuration": {
10
+ "chunk_shape": [
11
+ 192,
12
+ 384
13
+ ]
14
+ }
15
+ },
16
+ "chunk_key_encoding": {
17
+ "name": "default",
18
+ "configuration": {
19
+ "separator": "/"
20
+ }
21
+ },
22
+ "fill_value": 0.0,
23
+ "codecs": [
24
+ {
25
+ "name": "bytes",
26
+ "configuration": {
27
+ "endian": "little"
28
+ }
29
+ },
30
+ {
31
+ "name": "zstd",
32
+ "configuration": {
33
+ "level": 0,
34
+ "checksum": false
35
+ }
36
+ }
37
+ ],
38
+ "attributes": {},
39
+ "zarr_format": 3,
40
+ "node_type": "array",
41
+ "storage_transformers": []
42
+ }
dpo_checkpoints/state.zarr/.mu[0].attn.c_proj/zarr.json ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "shape": [
3
+ 768,
4
+ 768
5
+ ],
6
+ "data_type": "float32",
7
+ "chunk_grid": {
8
+ "name": "regular",
9
+ "configuration": {
10
+ "chunk_shape": [
11
+ 192,
12
+ 384
13
+ ]
14
+ }
15
+ },
16
+ "chunk_key_encoding": {
17
+ "name": "default",
18
+ "configuration": {
19
+ "separator": "/"
20
+ }
21
+ },
22
+ "fill_value": 0.0,
23
+ "codecs": [
24
+ {
25
+ "name": "bytes",
26
+ "configuration": {
27
+ "endian": "little"
28
+ }
29
+ },
30
+ {
31
+ "name": "zstd",
32
+ "configuration": {
33
+ "level": 0,
34
+ "checksum": false
35
+ }
36
+ }
37
+ ],
38
+ "attributes": {},
39
+ "zarr_format": 3,
40
+ "node_type": "array",
41
+ "storage_transformers": []
42
+ }
dpo_checkpoints/state.zarr/.mu[0].attn.c_q/zarr.json ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "shape": [
3
+ 768,
4
+ 768
5
+ ],
6
+ "data_type": "float32",
7
+ "chunk_grid": {
8
+ "name": "regular",
9
+ "configuration": {
10
+ "chunk_shape": [
11
+ 192,
12
+ 384
13
+ ]
14
+ }
15
+ },
16
+ "chunk_key_encoding": {
17
+ "name": "default",
18
+ "configuration": {
19
+ "separator": "/"
20
+ }
21
+ },
22
+ "fill_value": 0.0,
23
+ "codecs": [
24
+ {
25
+ "name": "bytes",
26
+ "configuration": {
27
+ "endian": "little"
28
+ }
29
+ },
30
+ {
31
+ "name": "zstd",
32
+ "configuration": {
33
+ "level": 0,
34
+ "checksum": false
35
+ }
36
+ }
37
+ ],
38
+ "attributes": {},
39
+ "zarr_format": 3,
40
+ "node_type": "array",
41
+ "storage_transformers": []
42
+ }
dpo_checkpoints/state.zarr/.mu[0].attn.c_v/zarr.json ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "shape": [
3
+ 768,
4
+ 768
5
+ ],
6
+ "data_type": "float32",
7
+ "chunk_grid": {
8
+ "name": "regular",
9
+ "configuration": {
10
+ "chunk_shape": [
11
+ 192,
12
+ 384
13
+ ]
14
+ }
15
+ },
16
+ "chunk_key_encoding": {
17
+ "name": "default",
18
+ "configuration": {
19
+ "separator": "/"
20
+ }
21
+ },
22
+ "fill_value": 0.0,
23
+ "codecs": [
24
+ {
25
+ "name": "bytes",
26
+ "configuration": {
27
+ "endian": "little"
28
+ }
29
+ },
30
+ {
31
+ "name": "zstd",
32
+ "configuration": {
33
+ "level": 0,
34
+ "checksum": false
35
+ }
36
+ }
37
+ ],
38
+ "attributes": {},
39
+ "zarr_format": 3,
40
+ "node_type": "array",
41
+ "storage_transformers": []
42
+ }
dpo_checkpoints/state.zarr/.mu[0].mlp.c_fc/zarr.json ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "shape": [
3
+ 768,
4
+ 3072
5
+ ],
6
+ "data_type": "float32",
7
+ "chunk_grid": {
8
+ "name": "regular",
9
+ "configuration": {
10
+ "chunk_shape": [
11
+ 192,
12
+ 768
13
+ ]
14
+ }
15
+ },
16
+ "chunk_key_encoding": {
17
+ "name": "default",
18
+ "configuration": {
19
+ "separator": "/"
20
+ }
21
+ },
22
+ "fill_value": 0.0,
23
+ "codecs": [
24
+ {
25
+ "name": "bytes",
26
+ "configuration": {
27
+ "endian": "little"
28
+ }
29
+ },
30
+ {
31
+ "name": "zstd",
32
+ "configuration": {
33
+ "level": 0,
34
+ "checksum": false
35
+ }
36
+ }
37
+ ],
38
+ "attributes": {},
39
+ "zarr_format": 3,
40
+ "node_type": "array",
41
+ "storage_transformers": []
42
+ }
dpo_checkpoints/state.zarr/.mu[0].mlp.c_proj/zarr.json ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "shape": [
3
+ 3072,
4
+ 768
5
+ ],
6
+ "data_type": "float32",
7
+ "chunk_grid": {
8
+ "name": "regular",
9
+ "configuration": {
10
+ "chunk_shape": [
11
+ 768,
12
+ 192
13
+ ]
14
+ }
15
+ },
16
+ "chunk_key_encoding": {
17
+ "name": "default",
18
+ "configuration": {
19
+ "separator": "/"
20
+ }
21
+ },
22
+ "fill_value": 0.0,
23
+ "codecs": [
24
+ {
25
+ "name": "bytes",
26
+ "configuration": {
27
+ "endian": "little"
28
+ }
29
+ },
30
+ {
31
+ "name": "zstd",
32
+ "configuration": {
33
+ "level": 0,
34
+ "checksum": false
35
+ }
36
+ }
37
+ ],
38
+ "attributes": {},
39
+ "zarr_format": 3,
40
+ "node_type": "array",
41
+ "storage_transformers": []
42
+ }
dpo_checkpoints/state.zarr/.mu[10].attn.c_k/zarr.json ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "shape": [
3
+ 768,
4
+ 768
5
+ ],
6
+ "data_type": "float32",
7
+ "chunk_grid": {
8
+ "name": "regular",
9
+ "configuration": {
10
+ "chunk_shape": [
11
+ 192,
12
+ 384
13
+ ]
14
+ }
15
+ },
16
+ "chunk_key_encoding": {
17
+ "name": "default",
18
+ "configuration": {
19
+ "separator": "/"
20
+ }
21
+ },
22
+ "fill_value": 0.0,
23
+ "codecs": [
24
+ {
25
+ "name": "bytes",
26
+ "configuration": {
27
+ "endian": "little"
28
+ }
29
+ },
30
+ {
31
+ "name": "zstd",
32
+ "configuration": {
33
+ "level": 0,
34
+ "checksum": false
35
+ }
36
+ }
37
+ ],
38
+ "attributes": {},
39
+ "zarr_format": 3,
40
+ "node_type": "array",
41
+ "storage_transformers": []
42
+ }
dpo_checkpoints/state.zarr/.mu[10].attn.c_q/zarr.json ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "shape": [
3
+ 768,
4
+ 768
5
+ ],
6
+ "data_type": "float32",
7
+ "chunk_grid": {
8
+ "name": "regular",
9
+ "configuration": {
10
+ "chunk_shape": [
11
+ 192,
12
+ 384
13
+ ]
14
+ }
15
+ },
16
+ "chunk_key_encoding": {
17
+ "name": "default",
18
+ "configuration": {
19
+ "separator": "/"
20
+ }
21
+ },
22
+ "fill_value": 0.0,
23
+ "codecs": [
24
+ {
25
+ "name": "bytes",
26
+ "configuration": {
27
+ "endian": "little"
28
+ }
29
+ },
30
+ {
31
+ "name": "zstd",
32
+ "configuration": {
33
+ "level": 0,
34
+ "checksum": false
35
+ }
36
+ }
37
+ ],
38
+ "attributes": {},
39
+ "zarr_format": 3,
40
+ "node_type": "array",
41
+ "storage_transformers": []
42
+ }
dpo_checkpoints/state.zarr/.mu[10].attn.c_v/zarr.json ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "shape": [
3
+ 768,
4
+ 768
5
+ ],
6
+ "data_type": "float32",
7
+ "chunk_grid": {
8
+ "name": "regular",
9
+ "configuration": {
10
+ "chunk_shape": [
11
+ 192,
12
+ 384
13
+ ]
14
+ }
15
+ },
16
+ "chunk_key_encoding": {
17
+ "name": "default",
18
+ "configuration": {
19
+ "separator": "/"
20
+ }
21
+ },
22
+ "fill_value": 0.0,
23
+ "codecs": [
24
+ {
25
+ "name": "bytes",
26
+ "configuration": {
27
+ "endian": "little"
28
+ }
29
+ },
30
+ {
31
+ "name": "zstd",
32
+ "configuration": {
33
+ "level": 0,
34
+ "checksum": false
35
+ }
36
+ }
37
+ ],
38
+ "attributes": {},
39
+ "zarr_format": 3,
40
+ "node_type": "array",
41
+ "storage_transformers": []
42
+ }
dpo_checkpoints/state.zarr/.mu[10].mlp.c_proj/zarr.json ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "shape": [
3
+ 3072,
4
+ 768
5
+ ],
6
+ "data_type": "float32",
7
+ "chunk_grid": {
8
+ "name": "regular",
9
+ "configuration": {
10
+ "chunk_shape": [
11
+ 768,
12
+ 192
13
+ ]
14
+ }
15
+ },
16
+ "chunk_key_encoding": {
17
+ "name": "default",
18
+ "configuration": {
19
+ "separator": "/"
20
+ }
21
+ },
22
+ "fill_value": 0.0,
23
+ "codecs": [
24
+ {
25
+ "name": "bytes",
26
+ "configuration": {
27
+ "endian": "little"
28
+ }
29
+ },
30
+ {
31
+ "name": "zstd",
32
+ "configuration": {
33
+ "level": 0,
34
+ "checksum": false
35
+ }
36
+ }
37
+ ],
38
+ "attributes": {},
39
+ "zarr_format": 3,
40
+ "node_type": "array",
41
+ "storage_transformers": []
42
+ }
dpo_checkpoints/state.zarr/.mu[11].attn.c_k/zarr.json ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "shape": [
3
+ 768,
4
+ 768
5
+ ],
6
+ "data_type": "float32",
7
+ "chunk_grid": {
8
+ "name": "regular",
9
+ "configuration": {
10
+ "chunk_shape": [
11
+ 192,
12
+ 384
13
+ ]
14
+ }
15
+ },
16
+ "chunk_key_encoding": {
17
+ "name": "default",
18
+ "configuration": {
19
+ "separator": "/"
20
+ }
21
+ },
22
+ "fill_value": 0.0,
23
+ "codecs": [
24
+ {
25
+ "name": "bytes",
26
+ "configuration": {
27
+ "endian": "little"
28
+ }
29
+ },
30
+ {
31
+ "name": "zstd",
32
+ "configuration": {
33
+ "level": 0,
34
+ "checksum": false
35
+ }
36
+ }
37
+ ],
38
+ "attributes": {},
39
+ "zarr_format": 3,
40
+ "node_type": "array",
41
+ "storage_transformers": []
42
+ }
dpo_checkpoints/state.zarr/.mu[11].attn.c_q/zarr.json ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "shape": [
3
+ 768,
4
+ 768
5
+ ],
6
+ "data_type": "float32",
7
+ "chunk_grid": {
8
+ "name": "regular",
9
+ "configuration": {
10
+ "chunk_shape": [
11
+ 192,
12
+ 384
13
+ ]
14
+ }
15
+ },
16
+ "chunk_key_encoding": {
17
+ "name": "default",
18
+ "configuration": {
19
+ "separator": "/"
20
+ }
21
+ },
22
+ "fill_value": 0.0,
23
+ "codecs": [
24
+ {
25
+ "name": "bytes",
26
+ "configuration": {
27
+ "endian": "little"
28
+ }
29
+ },
30
+ {
31
+ "name": "zstd",
32
+ "configuration": {
33
+ "level": 0,
34
+ "checksum": false
35
+ }
36
+ }
37
+ ],
38
+ "attributes": {},
39
+ "zarr_format": 3,
40
+ "node_type": "array",
41
+ "storage_transformers": []
42
+ }
dpo_checkpoints/state.zarr/.mu[11].attn.c_v/zarr.json ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "shape": [
3
+ 768,
4
+ 768
5
+ ],
6
+ "data_type": "float32",
7
+ "chunk_grid": {
8
+ "name": "regular",
9
+ "configuration": {
10
+ "chunk_shape": [
11
+ 192,
12
+ 384
13
+ ]
14
+ }
15
+ },
16
+ "chunk_key_encoding": {
17
+ "name": "default",
18
+ "configuration": {
19
+ "separator": "/"
20
+ }
21
+ },
22
+ "fill_value": 0.0,
23
+ "codecs": [
24
+ {
25
+ "name": "bytes",
26
+ "configuration": {
27
+ "endian": "little"
28
+ }
29
+ },
30
+ {
31
+ "name": "zstd",
32
+ "configuration": {
33
+ "level": 0,
34
+ "checksum": false
35
+ }
36
+ }
37
+ ],
38
+ "attributes": {},
39
+ "zarr_format": 3,
40
+ "node_type": "array",
41
+ "storage_transformers": []
42
+ }
dpo_checkpoints/state.zarr/.mu[11].mlp.c_fc/zarr.json ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "shape": [
3
+ 768,
4
+ 3072
5
+ ],
6
+ "data_type": "float32",
7
+ "chunk_grid": {
8
+ "name": "regular",
9
+ "configuration": {
10
+ "chunk_shape": [
11
+ 192,
12
+ 768
13
+ ]
14
+ }
15
+ },
16
+ "chunk_key_encoding": {
17
+ "name": "default",
18
+ "configuration": {
19
+ "separator": "/"
20
+ }
21
+ },
22
+ "fill_value": 0.0,
23
+ "codecs": [
24
+ {
25
+ "name": "bytes",
26
+ "configuration": {
27
+ "endian": "little"
28
+ }
29
+ },
30
+ {
31
+ "name": "zstd",
32
+ "configuration": {
33
+ "level": 0,
34
+ "checksum": false
35
+ }
36
+ }
37
+ ],
38
+ "attributes": {},
39
+ "zarr_format": 3,
40
+ "node_type": "array",
41
+ "storage_transformers": []
42
+ }
dpo_checkpoints/state.zarr/.mu[11].mlp.c_proj/zarr.json ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "shape": [
3
+ 3072,
4
+ 768
5
+ ],
6
+ "data_type": "float32",
7
+ "chunk_grid": {
8
+ "name": "regular",
9
+ "configuration": {
10
+ "chunk_shape": [
11
+ 768,
12
+ 192
13
+ ]
14
+ }
15
+ },
16
+ "chunk_key_encoding": {
17
+ "name": "default",
18
+ "configuration": {
19
+ "separator": "/"
20
+ }
21
+ },
22
+ "fill_value": 0.0,
23
+ "codecs": [
24
+ {
25
+ "name": "bytes",
26
+ "configuration": {
27
+ "endian": "little"
28
+ }
29
+ },
30
+ {
31
+ "name": "zstd",
32
+ "configuration": {
33
+ "level": 0,
34
+ "checksum": false
35
+ }
36
+ }
37
+ ],
38
+ "attributes": {},
39
+ "zarr_format": 3,
40
+ "node_type": "array",
41
+ "storage_transformers": []
42
+ }
dpo_checkpoints/state.zarr/.mu[1].attn.c_k/zarr.json ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "shape": [
3
+ 768,
4
+ 768
5
+ ],
6
+ "data_type": "float32",
7
+ "chunk_grid": {
8
+ "name": "regular",
9
+ "configuration": {
10
+ "chunk_shape": [
11
+ 192,
12
+ 384
13
+ ]
14
+ }
15
+ },
16
+ "chunk_key_encoding": {
17
+ "name": "default",
18
+ "configuration": {
19
+ "separator": "/"
20
+ }
21
+ },
22
+ "fill_value": 0.0,
23
+ "codecs": [
24
+ {
25
+ "name": "bytes",
26
+ "configuration": {
27
+ "endian": "little"
28
+ }
29
+ },
30
+ {
31
+ "name": "zstd",
32
+ "configuration": {
33
+ "level": 0,
34
+ "checksum": false
35
+ }
36
+ }
37
+ ],
38
+ "attributes": {},
39
+ "zarr_format": 3,
40
+ "node_type": "array",
41
+ "storage_transformers": []
42
+ }
dpo_checkpoints/state.zarr/.mu[1].attn.c_q/zarr.json ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "shape": [
3
+ 768,
4
+ 768
5
+ ],
6
+ "data_type": "float32",
7
+ "chunk_grid": {
8
+ "name": "regular",
9
+ "configuration": {
10
+ "chunk_shape": [
11
+ 192,
12
+ 384
13
+ ]
14
+ }
15
+ },
16
+ "chunk_key_encoding": {
17
+ "name": "default",
18
+ "configuration": {
19
+ "separator": "/"
20
+ }
21
+ },
22
+ "fill_value": 0.0,
23
+ "codecs": [
24
+ {
25
+ "name": "bytes",
26
+ "configuration": {
27
+ "endian": "little"
28
+ }
29
+ },
30
+ {
31
+ "name": "zstd",
32
+ "configuration": {
33
+ "level": 0,
34
+ "checksum": false
35
+ }
36
+ }
37
+ ],
38
+ "attributes": {},
39
+ "zarr_format": 3,
40
+ "node_type": "array",
41
+ "storage_transformers": []
42
+ }
dpo_checkpoints/state.zarr/.mu[1].attn.c_v/zarr.json ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "shape": [
3
+ 768,
4
+ 768
5
+ ],
6
+ "data_type": "float32",
7
+ "chunk_grid": {
8
+ "name": "regular",
9
+ "configuration": {
10
+ "chunk_shape": [
11
+ 192,
12
+ 384
13
+ ]
14
+ }
15
+ },
16
+ "chunk_key_encoding": {
17
+ "name": "default",
18
+ "configuration": {
19
+ "separator": "/"
20
+ }
21
+ },
22
+ "fill_value": 0.0,
23
+ "codecs": [
24
+ {
25
+ "name": "bytes",
26
+ "configuration": {
27
+ "endian": "little"
28
+ }
29
+ },
30
+ {
31
+ "name": "zstd",
32
+ "configuration": {
33
+ "level": 0,
34
+ "checksum": false
35
+ }
36
+ }
37
+ ],
38
+ "attributes": {},
39
+ "zarr_format": 3,
40
+ "node_type": "array",
41
+ "storage_transformers": []
42
+ }
dpo_checkpoints/state.zarr/.mu[1].mlp.c_proj/zarr.json ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "shape": [
3
+ 3072,
4
+ 768
5
+ ],
6
+ "data_type": "float32",
7
+ "chunk_grid": {
8
+ "name": "regular",
9
+ "configuration": {
10
+ "chunk_shape": [
11
+ 768,
12
+ 192
13
+ ]
14
+ }
15
+ },
16
+ "chunk_key_encoding": {
17
+ "name": "default",
18
+ "configuration": {
19
+ "separator": "/"
20
+ }
21
+ },
22
+ "fill_value": 0.0,
23
+ "codecs": [
24
+ {
25
+ "name": "bytes",
26
+ "configuration": {
27
+ "endian": "little"
28
+ }
29
+ },
30
+ {
31
+ "name": "zstd",
32
+ "configuration": {
33
+ "level": 0,
34
+ "checksum": false
35
+ }
36
+ }
37
+ ],
38
+ "attributes": {},
39
+ "zarr_format": 3,
40
+ "node_type": "array",
41
+ "storage_transformers": []
42
+ }
dpo_checkpoints/state.zarr/.mu[2].attn.c_k/zarr.json ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "shape": [
3
+ 768,
4
+ 768
5
+ ],
6
+ "data_type": "float32",
7
+ "chunk_grid": {
8
+ "name": "regular",
9
+ "configuration": {
10
+ "chunk_shape": [
11
+ 192,
12
+ 384
13
+ ]
14
+ }
15
+ },
16
+ "chunk_key_encoding": {
17
+ "name": "default",
18
+ "configuration": {
19
+ "separator": "/"
20
+ }
21
+ },
22
+ "fill_value": 0.0,
23
+ "codecs": [
24
+ {
25
+ "name": "bytes",
26
+ "configuration": {
27
+ "endian": "little"
28
+ }
29
+ },
30
+ {
31
+ "name": "zstd",
32
+ "configuration": {
33
+ "level": 0,
34
+ "checksum": false
35
+ }
36
+ }
37
+ ],
38
+ "attributes": {},
39
+ "zarr_format": 3,
40
+ "node_type": "array",
41
+ "storage_transformers": []
42
+ }
dpo_checkpoints/state.zarr/.mu[2].attn.c_proj/zarr.json ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "shape": [
3
+ 768,
4
+ 768
5
+ ],
6
+ "data_type": "float32",
7
+ "chunk_grid": {
8
+ "name": "regular",
9
+ "configuration": {
10
+ "chunk_shape": [
11
+ 192,
12
+ 384
13
+ ]
14
+ }
15
+ },
16
+ "chunk_key_encoding": {
17
+ "name": "default",
18
+ "configuration": {
19
+ "separator": "/"
20
+ }
21
+ },
22
+ "fill_value": 0.0,
23
+ "codecs": [
24
+ {
25
+ "name": "bytes",
26
+ "configuration": {
27
+ "endian": "little"
28
+ }
29
+ },
30
+ {
31
+ "name": "zstd",
32
+ "configuration": {
33
+ "level": 0,
34
+ "checksum": false
35
+ }
36
+ }
37
+ ],
38
+ "attributes": {},
39
+ "zarr_format": 3,
40
+ "node_type": "array",
41
+ "storage_transformers": []
42
+ }
dpo_checkpoints/state.zarr/.mu[2].attn.c_q/zarr.json ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "shape": [
3
+ 768,
4
+ 768
5
+ ],
6
+ "data_type": "float32",
7
+ "chunk_grid": {
8
+ "name": "regular",
9
+ "configuration": {
10
+ "chunk_shape": [
11
+ 192,
12
+ 384
13
+ ]
14
+ }
15
+ },
16
+ "chunk_key_encoding": {
17
+ "name": "default",
18
+ "configuration": {
19
+ "separator": "/"
20
+ }
21
+ },
22
+ "fill_value": 0.0,
23
+ "codecs": [
24
+ {
25
+ "name": "bytes",
26
+ "configuration": {
27
+ "endian": "little"
28
+ }
29
+ },
30
+ {
31
+ "name": "zstd",
32
+ "configuration": {
33
+ "level": 0,
34
+ "checksum": false
35
+ }
36
+ }
37
+ ],
38
+ "attributes": {},
39
+ "zarr_format": 3,
40
+ "node_type": "array",
41
+ "storage_transformers": []
42
+ }
dpo_checkpoints/state.zarr/.mu[2].attn.c_v/zarr.json ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "shape": [
3
+ 768,
4
+ 768
5
+ ],
6
+ "data_type": "float32",
7
+ "chunk_grid": {
8
+ "name": "regular",
9
+ "configuration": {
10
+ "chunk_shape": [
11
+ 192,
12
+ 384
13
+ ]
14
+ }
15
+ },
16
+ "chunk_key_encoding": {
17
+ "name": "default",
18
+ "configuration": {
19
+ "separator": "/"
20
+ }
21
+ },
22
+ "fill_value": 0.0,
23
+ "codecs": [
24
+ {
25
+ "name": "bytes",
26
+ "configuration": {
27
+ "endian": "little"
28
+ }
29
+ },
30
+ {
31
+ "name": "zstd",
32
+ "configuration": {
33
+ "level": 0,
34
+ "checksum": false
35
+ }
36
+ }
37
+ ],
38
+ "attributes": {},
39
+ "zarr_format": 3,
40
+ "node_type": "array",
41
+ "storage_transformers": []
42
+ }
dpo_checkpoints/state.zarr/.mu[2].mlp.c_fc/zarr.json ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "shape": [
3
+ 768,
4
+ 3072
5
+ ],
6
+ "data_type": "float32",
7
+ "chunk_grid": {
8
+ "name": "regular",
9
+ "configuration": {
10
+ "chunk_shape": [
11
+ 192,
12
+ 768
13
+ ]
14
+ }
15
+ },
16
+ "chunk_key_encoding": {
17
+ "name": "default",
18
+ "configuration": {
19
+ "separator": "/"
20
+ }
21
+ },
22
+ "fill_value": 0.0,
23
+ "codecs": [
24
+ {
25
+ "name": "bytes",
26
+ "configuration": {
27
+ "endian": "little"
28
+ }
29
+ },
30
+ {
31
+ "name": "zstd",
32
+ "configuration": {
33
+ "level": 0,
34
+ "checksum": false
35
+ }
36
+ }
37
+ ],
38
+ "attributes": {},
39
+ "zarr_format": 3,
40
+ "node_type": "array",
41
+ "storage_transformers": []
42
+ }
dpo_checkpoints/state.zarr/.mu[3].attn.c_proj/zarr.json ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "shape": [
3
+ 768,
4
+ 768
5
+ ],
6
+ "data_type": "float32",
7
+ "chunk_grid": {
8
+ "name": "regular",
9
+ "configuration": {
10
+ "chunk_shape": [
11
+ 192,
12
+ 384
13
+ ]
14
+ }
15
+ },
16
+ "chunk_key_encoding": {
17
+ "name": "default",
18
+ "configuration": {
19
+ "separator": "/"
20
+ }
21
+ },
22
+ "fill_value": 0.0,
23
+ "codecs": [
24
+ {
25
+ "name": "bytes",
26
+ "configuration": {
27
+ "endian": "little"
28
+ }
29
+ },
30
+ {
31
+ "name": "zstd",
32
+ "configuration": {
33
+ "level": 0,
34
+ "checksum": false
35
+ }
36
+ }
37
+ ],
38
+ "attributes": {},
39
+ "zarr_format": 3,
40
+ "node_type": "array",
41
+ "storage_transformers": []
42
+ }
dpo_checkpoints/state.zarr/.mu[3].mlp.c_fc/zarr.json ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "shape": [
3
+ 768,
4
+ 3072
5
+ ],
6
+ "data_type": "float32",
7
+ "chunk_grid": {
8
+ "name": "regular",
9
+ "configuration": {
10
+ "chunk_shape": [
11
+ 192,
12
+ 768
13
+ ]
14
+ }
15
+ },
16
+ "chunk_key_encoding": {
17
+ "name": "default",
18
+ "configuration": {
19
+ "separator": "/"
20
+ }
21
+ },
22
+ "fill_value": 0.0,
23
+ "codecs": [
24
+ {
25
+ "name": "bytes",
26
+ "configuration": {
27
+ "endian": "little"
28
+ }
29
+ },
30
+ {
31
+ "name": "zstd",
32
+ "configuration": {
33
+ "level": 0,
34
+ "checksum": false
35
+ }
36
+ }
37
+ ],
38
+ "attributes": {},
39
+ "zarr_format": 3,
40
+ "node_type": "array",
41
+ "storage_transformers": []
42
+ }
dpo_checkpoints/state.zarr/.mu[3].mlp.c_proj/zarr.json ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "shape": [
3
+ 3072,
4
+ 768
5
+ ],
6
+ "data_type": "float32",
7
+ "chunk_grid": {
8
+ "name": "regular",
9
+ "configuration": {
10
+ "chunk_shape": [
11
+ 768,
12
+ 192
13
+ ]
14
+ }
15
+ },
16
+ "chunk_key_encoding": {
17
+ "name": "default",
18
+ "configuration": {
19
+ "separator": "/"
20
+ }
21
+ },
22
+ "fill_value": 0.0,
23
+ "codecs": [
24
+ {
25
+ "name": "bytes",
26
+ "configuration": {
27
+ "endian": "little"
28
+ }
29
+ },
30
+ {
31
+ "name": "zstd",
32
+ "configuration": {
33
+ "level": 0,
34
+ "checksum": false
35
+ }
36
+ }
37
+ ],
38
+ "attributes": {},
39
+ "zarr_format": 3,
40
+ "node_type": "array",
41
+ "storage_transformers": []
42
+ }
dpo_checkpoints/state.zarr/.mu[4].attn.c_k/zarr.json ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "shape": [
3
+ 768,
4
+ 768
5
+ ],
6
+ "data_type": "float32",
7
+ "chunk_grid": {
8
+ "name": "regular",
9
+ "configuration": {
10
+ "chunk_shape": [
11
+ 192,
12
+ 384
13
+ ]
14
+ }
15
+ },
16
+ "chunk_key_encoding": {
17
+ "name": "default",
18
+ "configuration": {
19
+ "separator": "/"
20
+ }
21
+ },
22
+ "fill_value": 0.0,
23
+ "codecs": [
24
+ {
25
+ "name": "bytes",
26
+ "configuration": {
27
+ "endian": "little"
28
+ }
29
+ },
30
+ {
31
+ "name": "zstd",
32
+ "configuration": {
33
+ "level": 0,
34
+ "checksum": false
35
+ }
36
+ }
37
+ ],
38
+ "attributes": {},
39
+ "zarr_format": 3,
40
+ "node_type": "array",
41
+ "storage_transformers": []
42
+ }
dpo_checkpoints/state.zarr/.mu[4].attn.c_proj/zarr.json ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "shape": [
3
+ 768,
4
+ 768
5
+ ],
6
+ "data_type": "float32",
7
+ "chunk_grid": {
8
+ "name": "regular",
9
+ "configuration": {
10
+ "chunk_shape": [
11
+ 192,
12
+ 384
13
+ ]
14
+ }
15
+ },
16
+ "chunk_key_encoding": {
17
+ "name": "default",
18
+ "configuration": {
19
+ "separator": "/"
20
+ }
21
+ },
22
+ "fill_value": 0.0,
23
+ "codecs": [
24
+ {
25
+ "name": "bytes",
26
+ "configuration": {
27
+ "endian": "little"
28
+ }
29
+ },
30
+ {
31
+ "name": "zstd",
32
+ "configuration": {
33
+ "level": 0,
34
+ "checksum": false
35
+ }
36
+ }
37
+ ],
38
+ "attributes": {},
39
+ "zarr_format": 3,
40
+ "node_type": "array",
41
+ "storage_transformers": []
42
+ }
dpo_checkpoints/state.zarr/.mu[4].attn.c_q/zarr.json ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "shape": [
3
+ 768,
4
+ 768
5
+ ],
6
+ "data_type": "float32",
7
+ "chunk_grid": {
8
+ "name": "regular",
9
+ "configuration": {
10
+ "chunk_shape": [
11
+ 192,
12
+ 384
13
+ ]
14
+ }
15
+ },
16
+ "chunk_key_encoding": {
17
+ "name": "default",
18
+ "configuration": {
19
+ "separator": "/"
20
+ }
21
+ },
22
+ "fill_value": 0.0,
23
+ "codecs": [
24
+ {
25
+ "name": "bytes",
26
+ "configuration": {
27
+ "endian": "little"
28
+ }
29
+ },
30
+ {
31
+ "name": "zstd",
32
+ "configuration": {
33
+ "level": 0,
34
+ "checksum": false
35
+ }
36
+ }
37
+ ],
38
+ "attributes": {},
39
+ "zarr_format": 3,
40
+ "node_type": "array",
41
+ "storage_transformers": []
42
+ }
dpo_checkpoints/state.zarr/.mu[4].attn.c_v/zarr.json ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "shape": [
3
+ 768,
4
+ 768
5
+ ],
6
+ "data_type": "float32",
7
+ "chunk_grid": {
8
+ "name": "regular",
9
+ "configuration": {
10
+ "chunk_shape": [
11
+ 192,
12
+ 384
13
+ ]
14
+ }
15
+ },
16
+ "chunk_key_encoding": {
17
+ "name": "default",
18
+ "configuration": {
19
+ "separator": "/"
20
+ }
21
+ },
22
+ "fill_value": 0.0,
23
+ "codecs": [
24
+ {
25
+ "name": "bytes",
26
+ "configuration": {
27
+ "endian": "little"
28
+ }
29
+ },
30
+ {
31
+ "name": "zstd",
32
+ "configuration": {
33
+ "level": 0,
34
+ "checksum": false
35
+ }
36
+ }
37
+ ],
38
+ "attributes": {},
39
+ "zarr_format": 3,
40
+ "node_type": "array",
41
+ "storage_transformers": []
42
+ }
dpo_checkpoints/state.zarr/.mu[4].mlp.c_fc/zarr.json ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "shape": [
3
+ 768,
4
+ 3072
5
+ ],
6
+ "data_type": "float32",
7
+ "chunk_grid": {
8
+ "name": "regular",
9
+ "configuration": {
10
+ "chunk_shape": [
11
+ 192,
12
+ 768
13
+ ]
14
+ }
15
+ },
16
+ "chunk_key_encoding": {
17
+ "name": "default",
18
+ "configuration": {
19
+ "separator": "/"
20
+ }
21
+ },
22
+ "fill_value": 0.0,
23
+ "codecs": [
24
+ {
25
+ "name": "bytes",
26
+ "configuration": {
27
+ "endian": "little"
28
+ }
29
+ },
30
+ {
31
+ "name": "zstd",
32
+ "configuration": {
33
+ "level": 0,
34
+ "checksum": false
35
+ }
36
+ }
37
+ ],
38
+ "attributes": {},
39
+ "zarr_format": 3,
40
+ "node_type": "array",
41
+ "storage_transformers": []
42
+ }
dpo_checkpoints/state.zarr/.mu[4].mlp.c_proj/zarr.json ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "shape": [
3
+ 3072,
4
+ 768
5
+ ],
6
+ "data_type": "float32",
7
+ "chunk_grid": {
8
+ "name": "regular",
9
+ "configuration": {
10
+ "chunk_shape": [
11
+ 768,
12
+ 192
13
+ ]
14
+ }
15
+ },
16
+ "chunk_key_encoding": {
17
+ "name": "default",
18
+ "configuration": {
19
+ "separator": "/"
20
+ }
21
+ },
22
+ "fill_value": 0.0,
23
+ "codecs": [
24
+ {
25
+ "name": "bytes",
26
+ "configuration": {
27
+ "endian": "little"
28
+ }
29
+ },
30
+ {
31
+ "name": "zstd",
32
+ "configuration": {
33
+ "level": 0,
34
+ "checksum": false
35
+ }
36
+ }
37
+ ],
38
+ "attributes": {},
39
+ "zarr_format": 3,
40
+ "node_type": "array",
41
+ "storage_transformers": []
42
+ }
dpo_checkpoints/state.zarr/.mu[5].attn.c_proj/zarr.json ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "shape": [
3
+ 768,
4
+ 768
5
+ ],
6
+ "data_type": "float32",
7
+ "chunk_grid": {
8
+ "name": "regular",
9
+ "configuration": {
10
+ "chunk_shape": [
11
+ 192,
12
+ 384
13
+ ]
14
+ }
15
+ },
16
+ "chunk_key_encoding": {
17
+ "name": "default",
18
+ "configuration": {
19
+ "separator": "/"
20
+ }
21
+ },
22
+ "fill_value": 0.0,
23
+ "codecs": [
24
+ {
25
+ "name": "bytes",
26
+ "configuration": {
27
+ "endian": "little"
28
+ }
29
+ },
30
+ {
31
+ "name": "zstd",
32
+ "configuration": {
33
+ "level": 0,
34
+ "checksum": false
35
+ }
36
+ }
37
+ ],
38
+ "attributes": {},
39
+ "zarr_format": 3,
40
+ "node_type": "array",
41
+ "storage_transformers": []
42
+ }