File size: 1,738 Bytes
5c43f61 0f8cbc9 5c43f61 0f8cbc9 5c43f61 0f8cbc9 5c43f61 0f8cbc9 5c43f61 0f8cbc9 5c43f61 0f8cbc9 5c43f61 0f8cbc9 5c43f61 0f8cbc9 5c43f61 0f8cbc9 5c43f61 0f8cbc9 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 | """
Vortex-13B model configuration.
Optimized for 16GB VRAM (4060 Ti laptop) and MacBook Pro M3 Max.
"""
VORTEX_13B_CONFIG = {
# Model dimensions
"d_model": 5120,
"num_layers": 40,
"num_heads": 40,
"head_dim": 128, # d_model // num_heads
# State-space layer parameters
"d_state": 32, # SSM state dimension (larger for bigger model)
"d_conv": 4, # SSM convolution width
# Attention parameters
"window_size": 512, # Local attention window
"use_flash_attention": True,
# Feed-forward parameters
"ffn_expansion": 4,
"num_domains": 7,
"vocab_size": 50000,
"max_seq_len": 16384,
# Layer ratio: 50% SSM, 50% attention (more memory for attention)
"ssm_ratio": 0.5,
# Data types
"dtype": "bfloat16",
# Special tokens (same as 7B)
"special_tokens": {
"[PAD]": 0,
"[UNK]": 1,
"[BOS]": 2,
"[EOS]": 3,
"[EQUATION]": 4,
"[/EQUATION]": 5,
"[CITATION]": 6,
"[/CITATION]": 7,
"[MOLECULE]": 8,
"[/MOLECULE]": 9,
"[FIGURE]": 10,
"[TABLE]": 11,
"[MATH]": 12,
"[CHEM]": 13,
"[BIO]": 14,
"[PHYS]": 15,
"[EARTH]": 16,
"[SPACE]": 17,
"[ZOO]": 18,
},
"domain_tags": ["[MATH]", "[CHEM]", "[BIO]", "[PHYS]", "[EARTH]", "[SPACE]", "[ZOO]"],
# Science module flags
"enable_equation_module": True,
"enable_numerical_module": True,
"enable_citation_module": True,
"enable_molecular_module": True,
}
def get_config():
"""Return the 13B configuration dictionary."""
return VORTEX_13B_CONFIG
|