VOCAB = 288 AUDIO_VOCAB = 288 AUDIO_SR = 16000 AUDIO_FRAME_RATE = 50 THRESHOLD = 0.05 SPECIAL_TOKEN_MIN = 256 # Core dimensions HIDDEN_DIM = 5600 TRIGRAM_DIM = HIDDEN_DIM # alias for backward compat EMBEDDING_DIM = 1536 FFN_HIDDEN = 11200 CTX = 256 # VQ CODEBOOK_DIM = 512 CODEBOOK_SIZE = 131072 CODEBOOK_SIZE_TEXT = 131072 CODEBOOK_SIZE_IMAGE = 65536 CODEBOOK_SIZE_AUDIO = 65536 # Trigram stride policy STRIDE_TRAINING = 1 STRIDE_INFERENCE = 3 # Graph T_GRAPH_K_NEIGHBORS = 10 # MoE: global top-k active experts MOE_NUM_EXPERTS = 64 MOE_TOP_K = 8 MOE_CORE_RANK = 384 MOE_SHARED_INTER = 6400 ACT_MAX_ITERS = 4 MOE_MAX_ITERS = 2 # MemGram MEMGRAM_STRUCT_PRIMES = [ 64901, 64919, 64921, 64927, 64937, 64951, 64969, 64997, 65003, 65011, 65027, 65029, 65033, 65053, 65063, 65071, ] MEMGRAM_CONV_PRIMES = [8009, 8011, 8017, 8039] MEMGRAM_EMBED_DIM = 64 MEMGRAM_KEY_DIM = 32 # KV / context cache KV_CACHE_SIZE = 8_000_000 SLIDING_WINDOW_MAX = 1_600_000 KV_LEDGER_SIZE = KV_CACHE_SIZE SLIDING_WINDOW_SIZE = SLIDING_WINDOW_MAX KQ_CACHE_SIZE = 8192 # MLA Attention MLA_N_HEADS = 32 MLA_QK_NOPE_HEAD_DIM = 96 MLA_QK_ROPE_HEAD_DIM = 32 MLA_V_HEAD_DIM = 96 MLA_SLIDE_DIM = 64 MLA_FULL_DIM = 32 MLA_N_LAYERS = 4 MLA_ROPE_THETA = 10000.0 ATTENTION_STRIDE = 8 SPECIAL_VOCAB = { 'PAD': 256, 'BOS': 257, 'EOS': 258, 'STOP': 259, 'SYSTEM': 260, 'USER': 261, 'ASSISTANT': 262, 'SCRATCHPAD': 263, 'PLAN': 264, 'REFLECTION': 265, 'SUMMARY': 266, 'ACTION': 267, 'TOOL': 268, 'TOOL_RESULT': 269, 'FIM_PREFIX': 270, 'FIM_MIDDLE': 271, 'FIM_SUFFIX': 272, 'SEARCH': 273, 'CONTEXT': 274, 'CITATION': 275, 'ERROR': 276, 'FORMAT': 277, 'IMAGE': 278, 'TEXT': 279, 'AUDIO': 280, 'VIDEO': 281, 'SPEAK': 282, 'IMG_GEN': 283, 'RES1': 284, 'RES2': 285, 'RES3': 286, 'RESERVED': 287, }