| { | |
| "titan_config": { | |
| "dim": 384, | |
| "depth": 6, | |
| "segment_len": 64, | |
| "num_persist_mem_tokens": 4, | |
| "num_longterm_mem_tokens": 8, | |
| "dim_head": 64, | |
| "heads": 6, | |
| "ff_mult": 4, | |
| "num_residual_streams": 4, | |
| "use_flex_attn": false, | |
| "sliding_window_attn": true | |
| }, | |
| "neural_memory_config": { | |
| "depth": 2, | |
| "expansion_factor": 4.0, | |
| "neural_memory_layers": [ | |
| 2, | |
| 4 | |
| ], | |
| "neural_memory_segment_len": 16, | |
| "batch_size": 32 | |
| }, | |
| "vocab_size": 128256, | |
| "merged_from_parts": 5, | |
| "fusion_method": "weight_averaging" | |
| } |