| { | |
| "activation_dims": { | |
| "mlp_0": 768, | |
| "attn_0": 768, | |
| "mlp_1": 768, | |
| "attn_1": 768, | |
| "mlp_2": 768, | |
| "attn_2": 768, | |
| "mlp_3": 768, | |
| "attn_3": 768, | |
| "mlp_4": 768, | |
| "attn_4": 768, | |
| "mlp_5": 768, | |
| "attn_5": 768, | |
| "mlp_6": 768, | |
| "attn_6": 768, | |
| "mlp_7": 768, | |
| "attn_7": 768, | |
| "mlp_8": 768, | |
| "attn_8": 768, | |
| "mlp_9": 768, | |
| "attn_9": 768, | |
| "mlp_10": 768, | |
| "attn_10": 768, | |
| "mlp_11": 768, | |
| "attn_11": 768 | |
| }, | |
| "dict_sizes": { | |
| "mlp_0": 12288, | |
| "attn_0": 12288, | |
| "mlp_1": 12288, | |
| "attn_1": 12288, | |
| "mlp_2": 12288, | |
| "attn_2": 12288, | |
| "mlp_3": 12288, | |
| "attn_3": 12288, | |
| "mlp_4": 12288, | |
| "attn_4": 12288, | |
| "mlp_5": 12288, | |
| "attn_5": 12288, | |
| "mlp_6": 12288, | |
| "attn_6": 12288, | |
| "mlp_7": 12288, | |
| "attn_7": 12288, | |
| "mlp_8": 12288, | |
| "attn_8": 12288, | |
| "mlp_9": 12288, | |
| "attn_9": 12288, | |
| "mlp_10": 12288, | |
| "attn_10": 12288, | |
| "mlp_11": 12288, | |
| "attn_11": 12288 | |
| }, | |
| "ks": { | |
| "mlp_0": 128, | |
| "attn_0": 128, | |
| "mlp_1": 128, | |
| "attn_1": 128, | |
| "mlp_2": 128, | |
| "attn_2": 128, | |
| "mlp_3": 128, | |
| "attn_3": 128, | |
| "mlp_4": 128, | |
| "attn_4": 128, | |
| "mlp_5": 128, | |
| "attn_5": 128, | |
| "mlp_6": 128, | |
| "attn_6": 128, | |
| "mlp_7": 128, | |
| "attn_7": 128, | |
| "mlp_8": 128, | |
| "attn_8": 128, | |
| "mlp_9": 128, | |
| "attn_9": 128, | |
| "mlp_10": 128, | |
| "attn_10": 128, | |
| "mlp_11": 128, | |
| "attn_11": 128 | |
| }, | |
| "layers": [], | |
| "lm_name": "", | |
| "submodule_names": [ | |
| "mlp_0", | |
| "attn_0", | |
| "mlp_1", | |
| "attn_1", | |
| "mlp_2", | |
| "attn_2", | |
| "mlp_3", | |
| "attn_3", | |
| "mlp_4", | |
| "attn_4", | |
| "mlp_5", | |
| "attn_5", | |
| "mlp_6", | |
| "attn_6", | |
| "mlp_7", | |
| "attn_7", | |
| "mlp_8", | |
| "attn_8", | |
| "mlp_9", | |
| "attn_9", | |
| "mlp_10", | |
| "attn_10", | |
| "mlp_11", | |
| "attn_11" | |
| ], | |
| "connection_sparsity_coeff": 0.01, | |
| "use_sparse_connections": false, | |
| "dtype": "torch.float32", | |
| "buffer_config": { | |
| "ctx_len": 128, | |
| "refresh_batch_size": 256, | |
| "out_batch_size": 4096 | |
| } | |
| } |