File size: 1,738 Bytes
5c43f61
0f8cbc9
 
5c43f61
 
0f8cbc9
5c43f61
0f8cbc9
 
 
5c43f61
 
 
0f8cbc9
5c43f61
 
 
 
0f8cbc9
5c43f61
 
0f8cbc9
 
5c43f61
 
 
0f8cbc9
 
5c43f61
 
 
 
0f8cbc9
5c43f61
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0f8cbc9
5c43f61
 
 
 
 
 
 
 
0f8cbc9
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
"""

Vortex-13B model configuration.

Optimized for 16GB VRAM (4060 Ti laptop) and MacBook Pro M3 Max.

"""

VORTEX_13B_CONFIG = {
    # Model dimensions
    "d_model": 5120,
    "num_layers": 40,
    "num_heads": 40,
    "head_dim": 128,  # d_model // num_heads

    # State-space layer parameters
    "d_state": 32,          # SSM state dimension (larger for bigger model)
    "d_conv": 4,            # SSM convolution width

    # Attention parameters
    "window_size": 512,     # Local attention window
    "use_flash_attention": True,

    # Feed-forward parameters
    "ffn_expansion": 4,
    "num_domains": 7,
    "vocab_size": 50000,
    "max_seq_len": 16384,

    # Layer ratio: 50% SSM, 50% attention (more memory for attention)
    "ssm_ratio": 0.5,

    # Data types
    "dtype": "bfloat16",

    # Special tokens (same as 7B)
    "special_tokens": {
        "[PAD]": 0,
        "[UNK]": 1,
        "[BOS]": 2,
        "[EOS]": 3,
        "[EQUATION]": 4,
        "[/EQUATION]": 5,
        "[CITATION]": 6,
        "[/CITATION]": 7,
        "[MOLECULE]": 8,
        "[/MOLECULE]": 9,
        "[FIGURE]": 10,
        "[TABLE]": 11,
        "[MATH]": 12,
        "[CHEM]": 13,
        "[BIO]": 14,
        "[PHYS]": 15,
        "[EARTH]": 16,
        "[SPACE]": 17,
        "[ZOO]": 18,
    },

    "domain_tags": ["[MATH]", "[CHEM]", "[BIO]", "[PHYS]", "[EARTH]", "[SPACE]", "[ZOO]"],

    # Science module flags
    "enable_equation_module": True,
    "enable_numerical_module": True,
    "enable_citation_module": True,
    "enable_molecular_module": True,
}


def get_config():
    """Return the 13B configuration dictionary."""
    return VORTEX_13B_CONFIG