ARBS / arbitor /config.py
CLIWorks's picture
Upload folder using huggingface_hub
07c6ab1 verified
VOCAB = 288
AUDIO_VOCAB = 288
AUDIO_SR = 16000
AUDIO_FRAME_RATE = 50
THRESHOLD = 0.05
SPECIAL_TOKEN_MIN = 256
# Core dimensions
HIDDEN_DIM = 5600
TRIGRAM_DIM = HIDDEN_DIM # alias for backward compat
EMBEDDING_DIM = 1536
FFN_HIDDEN = 11200
CTX = 256
# VQ
CODEBOOK_DIM = 512
CODEBOOK_SIZE = 131072
CODEBOOK_SIZE_TEXT = 131072
CODEBOOK_SIZE_IMAGE = 65536
CODEBOOK_SIZE_AUDIO = 65536
# Trigram stride policy
STRIDE_TRAINING = 1
STRIDE_INFERENCE = 3
# Graph
T_GRAPH_K_NEIGHBORS = 10
# MoE: global top-k active experts
MOE_NUM_EXPERTS = 64
MOE_TOP_K = 8
MOE_CORE_RANK = 384
MOE_SHARED_INTER = 6400
ACT_MAX_ITERS = 4
MOE_MAX_ITERS = 2
# MemGram
MEMGRAM_STRUCT_PRIMES = [
64901, 64919, 64921, 64927, 64937, 64951, 64969, 64997,
65003, 65011, 65027, 65029, 65033, 65053, 65063, 65071,
]
MEMGRAM_CONV_PRIMES = [8009, 8011, 8017, 8039]
MEMGRAM_EMBED_DIM = 64
MEMGRAM_KEY_DIM = 32
# KV / context cache
KV_CACHE_SIZE = 8_000_000
SLIDING_WINDOW_MAX = 1_600_000
KV_LEDGER_SIZE = KV_CACHE_SIZE
SLIDING_WINDOW_SIZE = SLIDING_WINDOW_MAX
KQ_CACHE_SIZE = 8192
# MLA Attention
MLA_N_HEADS = 32
MLA_QK_NOPE_HEAD_DIM = 96
MLA_QK_ROPE_HEAD_DIM = 32
MLA_V_HEAD_DIM = 96
MLA_SLIDE_DIM = 64
MLA_FULL_DIM = 32
MLA_N_LAYERS = 4
MLA_ROPE_THETA = 10000.0
ATTENTION_STRIDE = 8
SPECIAL_VOCAB = {
'PAD': 256, 'BOS': 257, 'EOS': 258, 'STOP': 259,
'SYSTEM': 260, 'USER': 261, 'ASSISTANT': 262,
'SCRATCHPAD': 263, 'PLAN': 264, 'REFLECTION': 265, 'SUMMARY': 266,
'ACTION': 267, 'TOOL': 268, 'TOOL_RESULT': 269,
'FIM_PREFIX': 270, 'FIM_MIDDLE': 271, 'FIM_SUFFIX': 272,
'SEARCH': 273, 'CONTEXT': 274, 'CITATION': 275,
'ERROR': 276, 'FORMAT': 277,
'IMAGE': 278, 'TEXT': 279, 'AUDIO': 280,
'VIDEO': 281, 'SPEAK': 282, 'IMG_GEN': 283,
'RES1': 284, 'RES2': 285, 'RES3': 286, 'RESERVED': 287,
}