jacobcd52 commited on
Commit
27a9de9
·
verified ·
1 Parent(s): 820aecd

Upload config.json with huggingface_hub

Browse files
Files changed (1) hide show
  1. config.json +49 -48
config.json CHANGED
@@ -26,56 +26,56 @@
26
  "attn_11": 768
27
  },
28
  "dict_sizes": {
29
- "mlp_0": 6144,
30
- "attn_0": 6144,
31
- "mlp_1": 6144,
32
- "attn_1": 6144,
33
- "mlp_2": 6144,
34
- "attn_2": 6144,
35
- "mlp_3": 6144,
36
- "attn_3": 6144,
37
- "mlp_4": 6144,
38
- "attn_4": 6144,
39
- "mlp_5": 6144,
40
- "attn_5": 6144,
41
- "mlp_6": 6144,
42
- "attn_6": 6144,
43
- "mlp_7": 6144,
44
- "attn_7": 6144,
45
- "mlp_8": 6144,
46
- "attn_8": 6144,
47
- "mlp_9": 6144,
48
- "attn_9": 6144,
49
- "mlp_10": 6144,
50
- "attn_10": 6144,
51
- "mlp_11": 6144,
52
- "attn_11": 6144
53
  },
54
  "ks": {
55
- "mlp_0": 64,
56
- "attn_0": 64,
57
- "mlp_1": 64,
58
- "attn_1": 64,
59
- "mlp_2": 64,
60
- "attn_2": 64,
61
- "mlp_3": 64,
62
- "attn_3": 64,
63
- "mlp_4": 64,
64
- "attn_4": 64,
65
- "mlp_5": 64,
66
- "attn_5": 64,
67
- "mlp_6": 64,
68
- "attn_6": 64,
69
- "mlp_7": 64,
70
- "attn_7": 64,
71
- "mlp_8": 64,
72
- "attn_8": 64,
73
- "mlp_9": 64,
74
- "attn_9": 64,
75
- "mlp_10": 64,
76
- "attn_10": 64,
77
- "mlp_11": 64,
78
- "attn_11": 64
79
  },
80
  "layers": [],
81
  "lm_name": "",
@@ -107,6 +107,7 @@
107
  ],
108
  "connection_sparsity_coeff": 0.01,
109
  "use_sparse_connections": false,
 
110
  "buffer_config": {
111
  "ctx_len": 128,
112
  "refresh_batch_size": 256,
 
26
  "attn_11": 768
27
  },
28
  "dict_sizes": {
29
+ "mlp_0": 12288,
30
+ "attn_0": 12288,
31
+ "mlp_1": 12288,
32
+ "attn_1": 12288,
33
+ "mlp_2": 12288,
34
+ "attn_2": 12288,
35
+ "mlp_3": 12288,
36
+ "attn_3": 12288,
37
+ "mlp_4": 12288,
38
+ "attn_4": 12288,
39
+ "mlp_5": 12288,
40
+ "attn_5": 12288,
41
+ "mlp_6": 12288,
42
+ "attn_6": 12288,
43
+ "mlp_7": 12288,
44
+ "attn_7": 12288,
45
+ "mlp_8": 12288,
46
+ "attn_8": 12288,
47
+ "mlp_9": 12288,
48
+ "attn_9": 12288,
49
+ "mlp_10": 12288,
50
+ "attn_10": 12288,
51
+ "mlp_11": 12288,
52
+ "attn_11": 12288
53
  },
54
  "ks": {
55
+ "mlp_0": 128,
56
+ "attn_0": 128,
57
+ "mlp_1": 128,
58
+ "attn_1": 128,
59
+ "mlp_2": 128,
60
+ "attn_2": 128,
61
+ "mlp_3": 128,
62
+ "attn_3": 128,
63
+ "mlp_4": 128,
64
+ "attn_4": 128,
65
+ "mlp_5": 128,
66
+ "attn_5": 128,
67
+ "mlp_6": 128,
68
+ "attn_6": 128,
69
+ "mlp_7": 128,
70
+ "attn_7": 128,
71
+ "mlp_8": 128,
72
+ "attn_8": 128,
73
+ "mlp_9": 128,
74
+ "attn_9": 128,
75
+ "mlp_10": 128,
76
+ "attn_10": 128,
77
+ "mlp_11": 128,
78
+ "attn_11": 128
79
  },
80
  "layers": [],
81
  "lm_name": "",
 
107
  ],
108
  "connection_sparsity_coeff": 0.01,
109
  "use_sparse_connections": false,
110
+ "dtype": "torch.float32",
111
  "buffer_config": {
112
  "ctx_len": 128,
113
  "refresh_batch_size": 256,