scae / config.json
jacobcd52's picture
Upload config.json with huggingface_hub
27a9de9 verified
{
"activation_dims": {
"mlp_0": 768,
"attn_0": 768,
"mlp_1": 768,
"attn_1": 768,
"mlp_2": 768,
"attn_2": 768,
"mlp_3": 768,
"attn_3": 768,
"mlp_4": 768,
"attn_4": 768,
"mlp_5": 768,
"attn_5": 768,
"mlp_6": 768,
"attn_6": 768,
"mlp_7": 768,
"attn_7": 768,
"mlp_8": 768,
"attn_8": 768,
"mlp_9": 768,
"attn_9": 768,
"mlp_10": 768,
"attn_10": 768,
"mlp_11": 768,
"attn_11": 768
},
"dict_sizes": {
"mlp_0": 12288,
"attn_0": 12288,
"mlp_1": 12288,
"attn_1": 12288,
"mlp_2": 12288,
"attn_2": 12288,
"mlp_3": 12288,
"attn_3": 12288,
"mlp_4": 12288,
"attn_4": 12288,
"mlp_5": 12288,
"attn_5": 12288,
"mlp_6": 12288,
"attn_6": 12288,
"mlp_7": 12288,
"attn_7": 12288,
"mlp_8": 12288,
"attn_8": 12288,
"mlp_9": 12288,
"attn_9": 12288,
"mlp_10": 12288,
"attn_10": 12288,
"mlp_11": 12288,
"attn_11": 12288
},
"ks": {
"mlp_0": 128,
"attn_0": 128,
"mlp_1": 128,
"attn_1": 128,
"mlp_2": 128,
"attn_2": 128,
"mlp_3": 128,
"attn_3": 128,
"mlp_4": 128,
"attn_4": 128,
"mlp_5": 128,
"attn_5": 128,
"mlp_6": 128,
"attn_6": 128,
"mlp_7": 128,
"attn_7": 128,
"mlp_8": 128,
"attn_8": 128,
"mlp_9": 128,
"attn_9": 128,
"mlp_10": 128,
"attn_10": 128,
"mlp_11": 128,
"attn_11": 128
},
"layers": [],
"lm_name": "",
"submodule_names": [
"mlp_0",
"attn_0",
"mlp_1",
"attn_1",
"mlp_2",
"attn_2",
"mlp_3",
"attn_3",
"mlp_4",
"attn_4",
"mlp_5",
"attn_5",
"mlp_6",
"attn_6",
"mlp_7",
"attn_7",
"mlp_8",
"attn_8",
"mlp_9",
"attn_9",
"mlp_10",
"attn_10",
"mlp_11",
"attn_11"
],
"connection_sparsity_coeff": 0.01,
"use_sparse_connections": false,
"dtype": "torch.float32",
"buffer_config": {
"ctx_len": 128,
"refresh_batch_size": 256,
"out_batch_size": 4096
}
}