File size: 2,475 Bytes
1dbf0a1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27a9de9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1dbf0a1
 
27a9de9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1dbf0a1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27a9de9
1dbf0a1
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
{
    "activation_dims": {
        "mlp_0": 768,
        "attn_0": 768,
        "mlp_1": 768,
        "attn_1": 768,
        "mlp_2": 768,
        "attn_2": 768,
        "mlp_3": 768,
        "attn_3": 768,
        "mlp_4": 768,
        "attn_4": 768,
        "mlp_5": 768,
        "attn_5": 768,
        "mlp_6": 768,
        "attn_6": 768,
        "mlp_7": 768,
        "attn_7": 768,
        "mlp_8": 768,
        "attn_8": 768,
        "mlp_9": 768,
        "attn_9": 768,
        "mlp_10": 768,
        "attn_10": 768,
        "mlp_11": 768,
        "attn_11": 768
    },
    "dict_sizes": {
        "mlp_0": 12288,
        "attn_0": 12288,
        "mlp_1": 12288,
        "attn_1": 12288,
        "mlp_2": 12288,
        "attn_2": 12288,
        "mlp_3": 12288,
        "attn_3": 12288,
        "mlp_4": 12288,
        "attn_4": 12288,
        "mlp_5": 12288,
        "attn_5": 12288,
        "mlp_6": 12288,
        "attn_6": 12288,
        "mlp_7": 12288,
        "attn_7": 12288,
        "mlp_8": 12288,
        "attn_8": 12288,
        "mlp_9": 12288,
        "attn_9": 12288,
        "mlp_10": 12288,
        "attn_10": 12288,
        "mlp_11": 12288,
        "attn_11": 12288
    },
    "ks": {
        "mlp_0": 128,
        "attn_0": 128,
        "mlp_1": 128,
        "attn_1": 128,
        "mlp_2": 128,
        "attn_2": 128,
        "mlp_3": 128,
        "attn_3": 128,
        "mlp_4": 128,
        "attn_4": 128,
        "mlp_5": 128,
        "attn_5": 128,
        "mlp_6": 128,
        "attn_6": 128,
        "mlp_7": 128,
        "attn_7": 128,
        "mlp_8": 128,
        "attn_8": 128,
        "mlp_9": 128,
        "attn_9": 128,
        "mlp_10": 128,
        "attn_10": 128,
        "mlp_11": 128,
        "attn_11": 128
    },
    "layers": [],
    "lm_name": "",
    "submodule_names": [
        "mlp_0",
        "attn_0",
        "mlp_1",
        "attn_1",
        "mlp_2",
        "attn_2",
        "mlp_3",
        "attn_3",
        "mlp_4",
        "attn_4",
        "mlp_5",
        "attn_5",
        "mlp_6",
        "attn_6",
        "mlp_7",
        "attn_7",
        "mlp_8",
        "attn_8",
        "mlp_9",
        "attn_9",
        "mlp_10",
        "attn_10",
        "mlp_11",
        "attn_11"
    ],
    "connection_sparsity_coeff": 0.01,
    "use_sparse_connections": false,
    "dtype": "torch.float32",
    "buffer_config": {
        "ctx_len": 128,
        "refresh_batch_size": 256,
        "out_batch_size": 4096
    }
}