| #!/usr/bin/env bash | |
| model=/mnt/data/models/ubergarm/MiniMax-M2.5-GGUF/MiniMax-M2.5-Q8_0.gguf | |
| numactl -N "$SOCKET" -m "$SOCKET" \ | |
| ./build/bin/llama-perplexity \ | |
| -m "$model" \ | |
| -f wiki.test.raw \ | |
| --seed 1337 \ | |
| --ctx-size 512 \ | |
| -ub 4096 -b 4096 \ | |
| --numa numactl \ | |
| --threads 96 \ | |
| --threads-batch 128 \ | |
| --validate-quants \ | |
| --no-mmap | |
| SOCKET is set to: 0 | |
| main: build = 4190 (494d7062) | |
| main: built with cc (Ubuntu 13.3.0-6ubuntu2~24.04) 13.3.0 for x86_64-linux-gnu | |
| main: seed = 1337 | |
| CPU: using device CPU - 0 MiB free | |
| llama_model_loader: loaded meta data with 37 key-value pairs and 809 tensors from /mnt/data/models/ubergarm/MiniMax-M2.5-GGUF/MiniMax-M2.5-Q8_0.gguf (version GGUF V3 (latest)) | |
| llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output. | |
| llama_model_loader: - kv 0: general.architecture str = minimax-m2 | |
| llama_model_loader: - kv 1: general.type str = model | |
| llama_model_loader: - kv 2: general.sampling.top_k i32 = 40 | |
| llama_model_loader: - kv 3: general.sampling.top_p f32 = 0.950000 | |
| llama_model_loader: - kv 4: general.sampling.temp f32 = 1.000000 | |
| llama_model_loader: - kv 5: general.name str = MiniMax M2.5 | |
| llama_model_loader: - kv 6: general.size_label str = 256x4.9B | |
| llama_model_loader: - kv 7: general.license str = other | |
| llama_model_loader: - kv 8: general.license.name str = modified-mit | |
| llama_model_loader: - kv 9: general.license.link str = https://github.com/MiniMax-AI/MiniMax... | |
| llama_model_loader: - kv 10: general.tags arr[str,1] = ["text-generation"] | |
| llama_model_loader: - kv 11: minimax-m2.block_count u32 = 62 | |
| llama_model_loader: - kv 12: minimax-m2.context_length u32 = 196608 | |
| llama_model_loader: - kv 13: minimax-m2.embedding_length u32 = 3072 | |
| llama_model_loader: - kv 14: minimax-m2.feed_forward_length u32 = 1536 | |
| llama_model_loader: - kv 15: minimax-m2.attention.head_count u32 = 48 | |
| llama_model_loader: - kv 16: minimax-m2.attention.head_count_kv u32 = 8 | |
| llama_model_loader: - kv 17: minimax-m2.rope.freq_base f32 = 5000000.000000 | |
| llama_model_loader: - kv 18: minimax-m2.attention.layer_norm_rms_epsilon f32 = 0.000001 | |
| llama_model_loader: - kv 19: minimax-m2.expert_count u32 = 256 | |
| llama_model_loader: - kv 20: minimax-m2.expert_used_count u32 = 8 | |
| llama_model_loader: - kv 21: minimax-m2.expert_gating_func u32 = 2 | |
| llama_model_loader: - kv 22: minimax-m2.attention.key_length u32 = 128 | |
| llama_model_loader: - kv 23: minimax-m2.attention.value_length u32 = 128 | |
| llama_model_loader: - kv 24: general.file_type u32 = 7 | |
| llama_model_loader: - kv 25: minimax-m2.expert_feed_forward_length u32 = 1536 | |
| llama_model_loader: - kv 26: minimax-m2.rope.dimension_count u32 = 64 | |
| llama_model_loader: - kv 27: general.quantization_version u32 = 2 | |
| llama_model_loader: - kv 28: tokenizer.ggml.model str = gpt2 | |
| llama_model_loader: - kv 29: tokenizer.ggml.pre str = minimax-m2 | |
| llama_model_loader: - kv 30: tokenizer.ggml.tokens arr[str,200064] = ["Δ", "Δ", "Δ", "Δ", "Δ", "Δ ", ...llama_model_loader: - kv 31: tokenizer.ggml.token_type arr[i32,200064] = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...llama_model_loader: - kv 32: tokenizer.ggml.merges arr[str,199744] = ["Δ Δ ", "Δ t", "Δ a", "i n", "e r... | |
| llama_model_loader: - kv 33: tokenizer.ggml.bos_token_id u32 = 200034 | |
| llama_model_loader: - kv 34: tokenizer.ggml.eos_token_id u32 = 200020 | |
| llama_model_loader: - kv 35: tokenizer.ggml.unknown_token_id u32 = 200021 | |
| llama_model_loader: - kv 36: tokenizer.chat_template str = {# ----------βββ special token ... | |
| llama_model_loader: - type f32: 373 tensors | |
| llama_model_loader: - type q8_0: 436 tensors | |
| load: special_eos_id is not in special_eog_ids - the tokenizer config may be incorrect | |
| load: printing all EOG tokens: | |
| load: - 200004 ('<fim_pad>') | |
| load: - 200005 ('<reponame>') | |
| load: - 200020 ('[e~[') | |
| load: special tokens cache size = 54 | |
| load: token to piece cache size = 1.3355 MB | |
| llm_load_print_meta: format = GGUF V3 (latest) | |
| llm_load_print_meta: arch = minimax-m2 | |
| llm_load_print_meta: n_ctx_train = 196608 | |
| llm_load_print_meta: n_embd = 3072 | |
| llm_load_print_meta: n_layer = 62 | |
| llm_load_print_meta: n_head = 48 | |
| llm_load_print_meta: n_head_kv = 8 | |
| llm_load_print_meta: n_rot = 64 | |
| llm_load_print_meta: n_swa = 0 | |
| llm_load_print_meta: n_swa_pattern = 1 | |
| llm_load_print_meta: n_embd_head_k = 128 | |
| llm_load_print_meta: n_embd_head_v = 128 | |
| llm_load_print_meta: n_gqa = 6 | |
| llm_load_print_meta: n_embd_k_gqa = 1024 | |
| llm_load_print_meta: n_embd_v_gqa = 1024 | |
| llm_load_print_meta: f_norm_eps = 0.0e+00 | |
| llm_load_print_meta: f_norm_rms_eps = 1.0e-06 | |
| llm_load_print_meta: f_clamp_kqv = 0.0e+00 | |
| llm_load_print_meta: f_max_alibi_bias = 0.0e+00 | |
| llm_load_print_meta: f_logit_scale = 0.0e+00 | |
| llm_load_print_meta: n_ff = 1536 | |
| llm_load_print_meta: n_expert = 256 | |
| llm_load_print_meta: n_expert_used = 8 | |
| llm_load_print_meta: causal attn = 1 | |
| llm_load_print_meta: pooling type = 0 | |
| llm_load_print_meta: rope type = 2 | |
| llm_load_print_meta: rope scaling = linear | |
| llm_load_print_meta: freq_base_train = 5000000.0 | |
| llm_load_print_meta: freq_scale_train = 1 | |
| llm_load_print_meta: n_ctx_orig_yarn = 196608 | |
| llm_load_print_meta: rope_finetuned = unknown | |
| llm_load_print_meta: ssm_d_conv = 0 | |
| llm_load_print_meta: ssm_d_inner = 0 | |
| llm_load_print_meta: ssm_d_state = 0 | |
| llm_load_print_meta: ssm_dt_rank = 0 | |
| llm_load_print_meta: model type = 230B.A10B | |
| llm_load_print_meta: model ftype = Q8_0 | |
| llm_load_print_meta: model params = 228.690 B | |
| llm_load_print_meta: model size = 226.431 GiB (8.505 BPW) | |
| llm_load_print_meta: repeating layers = 225.215 GiB (8.505 BPW, 227.461 B parameters) | |
| llm_load_print_meta: general.name = MiniMax M2.5 | |
| print_info: vocab type = BPE | |
| print_info: n_vocab = 200064 | |
| print_info: n_merges = 199744 | |
| print_info: BOS token = 200034 ']~!b[' | |
| print_info: EOS token = 200020 '[e~[' | |
| print_info: UNK token = 200021 ']!d~[' | |
| print_info: LF token = 10 'Δ' | |
| print_info: FIM PRE token = 200001 '<fim_prefix>' | |
| print_info: FIM SUF token = 200003 '<fim_suffix>' | |
| print_info: FIM MID token = 200002 '<fim_middle>' | |
| print_info: FIM PAD token = 200004 '<fim_pad>' | |
| print_info: FIM REP token = 200005 '<reponame>' | |
| print_info: EOG token = 200004 '<fim_pad>' | |
| print_info: EOG token = 200005 '<reponame>' | |
| print_info: EOG token = 200020 '[e~[' | |
| print_info: max token length = 256 | |
| llm_load_tensors: ggml ctx size = 0.35 MiB | |
| llm_load_tensors: offloading 0 repeating layers to GPU | |
| llm_load_tensors: offloaded 0/63 layers to GPU | |
| llm_load_tensors: CPU buffer size = 231865.49 MiB | |
| .................................................................................................... | |
| llama_new_context_with_model: n_ctx = 4096 | |
| llama_new_context_with_model: n_batch = 4096 | |
| llama_new_context_with_model: n_ubatch = 4096 | |
| llama_new_context_with_model: flash_attn = 1 | |
| llama_new_context_with_model: attn_max_b = 0 | |
| llama_new_context_with_model: fused_moe = 1 | |
| llama_new_context_with_model: grouped er = 0 | |
| llama_new_context_with_model: fused_up_gate = 1 | |
| llama_new_context_with_model: fused_mmad = 1 | |
| llama_new_context_with_model: rope_cache = 0 | |
| llama_new_context_with_model: graph_reuse = 1 | |
| llama_new_context_with_model: k_cache_hadam = 0 | |
| llama_new_context_with_model: split_mode_graph_scheduling = 0 | |
| llama_new_context_with_model: reduce_type = f16 | |
| llama_new_context_with_model: sched_async = 0 | |
| llama_new_context_with_model: ser = -1, 0 | |
| llama_new_context_with_model: freq_base = 5000000.0 | |
| llama_new_context_with_model: freq_scale = 1 | |
| llama_kv_cache_init: CPU KV buffer size = 992.00 MiB | |
| llama_new_context_with_model: KV self size = 992.00 MiB, K (f16): 496.00 MiB, V (f16): 496.00 MiB | |
| llama_new_context_with_model: CPU output buffer size = 6.11 MiB | |
| llama_new_context_with_model: CPU compute buffer size = 3222.00 MiB | |
| llama_new_context_with_model: graph nodes = 2361 | |
| llama_new_context_with_model: graph splits = 1 | |
| XXXXXXXXXXXXXXXXXXXXX Setting only active experts offload | |
| system_info: n_threads = 96 (n_threads_batch = 128) / 512 | AVX = 1 | AVX_VNNI = 1 | AVX2 = 1 | AVX512 = 1 | AVX512_VBMI = 1 | AVX512_VNNI = 1 | AVX512_BF16 = 1 | FMA = 1 | NEON = 0 | SVE = 0 | ARM_FMA = 0 | F16C = 1 | FP16_VA = 0 | WASM_SIMD = 0 | BLAS = 0 | SSE3 = 1 | SSSE3 = 1 | VSX = 0 | MATMUL_INT8 = 0 | | |
| perplexity: tokenizing the input .. | |
| perplexity: tokenization took 667.124 ms | |
| perplexity: calculating perplexity over 552 chunks, n_ctx=512, batch_size=4096, n_seq=8 | |
| perplexity: 9.91 seconds per pass - ETA 11.40 minutes | |
| ===================================== llama_new_context_with_model: f16 | |
| ======================================= HAVE_FANCY_SIMD is defined | |
| [1]3.4569,[2]4.6901,[3]4.2847,[4]4.7391,[5]5.1472,[6]5.6953,[7]6.2568,[8]7.2742,[9]7.7173,[10]7.8740,[11]8.0505,[12]8.4482,[13]8.5166,[14]8.3752,[15]8.5316,[16]8.1556,[17]8.3451,[18]8.3431,[19]8.1903,[20]7.9329,[21]7.8668,[22]7.6172,[23]7.3654,[24]7.2336,[25]6.8601,[26]6.6704,[27]6.7957,[28]6.7809,[29]6.8151,[30]6.8140,[31]6.7673,[32]6.8183,[33]6.9399,[34]7.0939,[35]7.2096,[36]7.1694,[37]7.2248,[38]7.2967,[39]7.3159,[40]7.4139,[41]7.4850,[42]7.4500,[43]7.4671,[44]7.6211,[45]7.7269,[46]7.7055,[47]7.6796,[48]7.6968,[49]7.7137,[50]7.7986,[51]7.8418,[52]7.8477,[53]7.9147,[54]7.9275,[55]7.9665,[56]7.9250,[57]7.9530,[58]7.9343,[59]7.9752,[60]8.0366,[61]8.1361,[62]8.2119,[63]8.2518,[64]8.2500,[65]8.2660,[66]8.2524,[67]8.2836,[68]8.3390,[69]8.3717,[70]8.3718,[71]8.3193,[72]8.3187,[73]8.3545,[74]8.3552,[75]8.2182,[76]8.1613,[77]8.1709,[78]8.1860,[79]8.1952,[80]8.1832,[81]8.2352,[82]8.2288,[83]8.2247,[84]8.2451,[85]8.2448,[86]8.3252,[87]8.3205,[88]8.3524,[89]8.3645,[90]8.3685,[91]8.3605,[92]8.3303,[93]8.3398,[94]8.3377,[95]8.3861,[96]8.3921,[97]8.4146,[98]8.4191,[99]8.4190,[100]8.4065,[101]8.4828,[102]8.5297,[103]8.5955,[104]8.6493,[105]8.7320,[106]8.7642,[107]8.7265,[108]8.8124,[109]8.8555,[110]8.8182,[111]8.7656,[112]8.7575,[113]8.7083,[114]8.7045,[115]8.6543,[116]8.6284,[117]8.5860,[118]8.5628,[119]8.5118,[120]8.4689,[121]8.4271,[122]8.3525,[123]8.3013,[124]8.2537,[125]8.2056,[126]8.1823,[127]8.1779,[128]8.1872,[129]8.1822,[130]8.1806,[131]8.1835,[132]8.2102,[133]8.2171,[134]8.2274,[135]8.2183,[136]8.2096,[137]8.2083,[138]8.1621,[139]8.1344,[140]8.0953,[141]8.0694,[142]8.0259,[143]7.9821,[144]7.9589,[145]7.9428,[146]7.9117,[147]7.8811,[148]7.8320,[149]7.7962,[150]7.7714,[151]7.7517,[152]7.7275,[153]7.7181,[154]7.7018,[155]7.7023,[156]7.6867,[157]7.6940,[158]7.7031,[159]7.7110,[160]7.7364,[161]7.7601,[162]7.8087,[163]7.8496,[164]7.8841,[165]7.9442,[166]7.9704,[167]8.0103,[168]8.0401,[169]8.0605,[170]8.0553,[171]8.0562,[172]8.0836,[173]8.0489,[174]8.0550,[175]8.0560,[176]8.0644,[177]8.0699,[178]8.0739,[179]8.1127,[180]8.1433,[181]8.1664,[182]8.1687,[183]8.1974,[184]8.2444,[185]8.2755,[186]8.2965,[187]8.3041,[188]8.3028,[189]8.2832,[190]8.2820,[191]8.2673,[192]8.2946,[193]8.3183,[194]8.3404,[195]8.3360,[196]8.3489,[197]8.3276,[198]8.3618,[199]8.3364,[200]8.3228,[201]8.3083,[202]8.2933,[203]8.2823,[204]8.2825,[205]8.2922,[206]8.3100,[207]8.2926,[208]8.2565,[209]8.2424,[210]8.2417,[211]8.2319,[212]8.2336,[213]8.2251,[214]8.1866,[215]8.1615,[216]8.1506,[217]8.1317,[218]8.1186,[219]8.1176,[220]8.1166,[221]8.1116,[222]8.0821,[223]8.0691,[224]8.0622,[225]8.0547,[226]8.0580,[227]8.0629,[228]8.0678,[229]8.0595,[230]8.0797,[231]8.0819,[232]8.1114,[233]8.1334,[234]8.1467,[235]8.1570,[236]8.1757,[237]8.1948,[238]8.1978,[239]8.2142,[240]8.2463,[241]8.2647,[242]8.2651,[243]8.2773,[244]8.2703,[245]8.2343,[246]8.2135,[247]8.1931,[248]8.1815,[249]8.1754,[250]8.1825,[251]8.1824,[252]8.1715,[253]8.1577,[254]8.1616,[255]8.1499,[256]8.1351,[257]8.1252,[258]8.1118,[259]8.1096,[260]8.1046,[261]8.0869,[262]8.0809,[263]8.0660,[264]8.0607,[265]8.0556,[266]8.0328,[267]8.0320,[268]7.9985,[269]7.9874,[270]7.9841,[271]7.9767,[272]7.9658,[273]7.9656,[274]7.9800,[275]7.9858,[276]7.9957,[277]8.0033,[278]8.0102,[279]8.0229,[280]8.0308,[281]8.0459,[282]8.0446,[283]8.0395,[284]8.0424,[285]8.0406,[286]8.0316,[287]8.0244,[288]8.0382,[289]8.0525,[290]8.0481,[291]8.0470,[292]8.0484,[293]8.0493,[294]8.0546,[295]8.0634,[296]8.0639,[297]8.0677,[298]8.0678,[299]8.0666,[300]8.0815,[301]8.0868,[302]8.0789,[303]8.0775,[304]8.0673,[305]8.0738,[306]8.0837,[307]8.0945,[308]8.1146,[309]8.1133,[310]8.1264,[311]8.1190,[312]8.1208,[313]8.1118,[314]8.1064,[315]8.1123,[316]8.1021,[317]8.0996,[318]8.1084,[319]8.1017,[320]8.1190,[321]8.1171,[322]8.1178,[323]8.1154,[324]8.1121,[325]8.1085,[326]8.1178,[327]8.1242,[328]8.1209,[329]8.1202,[330]8.1107,[331]8.1017,[332]8.0911,[333]8.0960,[334]8.0944,[335]8.0843,[336]8.0986,[337]8.1075,[338]8.1188,[339]8.1112,[340]8.1099,[341]8.1037,[342]8.1133,[343]8.1000,[344]8.0901,[345]8.0995,[346]8.1246,[347]8.1483,[348]8.1738,[349]8.1818,[350]8.2029,[351]8.2243,[352]8.2383,[353]8.2528,[354]8.2574,[355]8.2662,[356]8.2763,[357]8.2709,[358]8.2881,[359]8.3034,[360]8.3121,[361]8.3254,[362]8.3355,[363]8.3493,[364]8.3605,[365]8.3852,[366]8.3967,[367]8.4003,[368]8.4062,[369]8.4130,[370]8.4380,[371]8.4535,[372]8.4571,[373]8.4459,[374]8.4423,[375]8.4463,[376]8.4556,[377]8.4588,[378]8.4674,[379]8.4737,[380]8.4793,[381]8.4965,[382]8.4908,[383]8.4574,[384]8.4464,[385]8.4384,[386]8.4426,[387]8.4502,[388]8.4473,[389]8.4527,[390]8.4591,[391]8.4466,[392]8.4325,[393]8.4279,[394]8.4138,[395]8.4003,[396]8.3979,[397]8.3935,[398]8.3763,[399]8.3616,[400]8.3509,[401]8.3421,[402]8.3316,[403]8.3174,[404]8.3057,[405]8.3048,[406]8.3151,[407]8.3259,[408]8.3168,[409]8.3094,[410]8.3158,[411]8.3052,[412]8.3080,[413]8.3136,[414]8.3089,[415]8.3117,[416]8.3055,[417]8.2999,[418]8.2912,[419]8.2900,[420]8.2828,[421]8.2795,[422]8.2766,[423]8.2750,[424]8.2662,[425]8.2550,[426]8.2405,[427]8.2368,[428]8.2268,[429]8.2140,[430]8.1998,[431]8.1900,[432]8.1966,[433]8.2096,[434]8.2207,[435]8.2321,[436]8.2290,[437]8.2269,[438]8.2267,[439]8.2336,[440]8.2332,[441]8.2352,[442]8.2392,[443]8.2518,[444]8.2605,[445]8.2607,[446]8.2670,[447]8.2617,[448]8.2626,[449]8.2546,[450]8.2621,[451]8.2711,[452]8.2717,[453]8.2715,[454]8.2659,[455]8.2688,[456]8.2814,[457]8.2806,[458]8.2847,[459]8.2960,[460]8.3020,[461]8.2998,[462]8.3053,[463]8.3036,[464]8.3055,[465]8.3034,[466]8.2977,[467]8.3007,[468]8.2957,[469]8.2919,[470]8.2939,[471]8.3002,[472]8.3102,[473]8.2992,[474]8.3024,[475]8.2990,[476]8.3042,[477]8.3150,[478]8.3191,[479]8.3278,[480]8.3388,[481]8.3416,[482]8.3416,[483]8.3504,[484]8.3544,[485]8.3496,[486]8.3459,[487]8.3402,[488]8.3359,[489]8.3351,[490]8.3268,[491]8.3305,[492]8.3314,[493]8.3449,[494]8.3316,[495]8.3328,[496]8.3326,[497]8.3355,[498]8.3440,[499]8.3491,[500]8.3420,[501]8.3342,[502]8.3259,[503]8.3350,[504]8.3333,[505]8.3348,[506]8.3392,[507]8.3351,[508]8.3374,[509]8.3488,[510]8.3463,[511]8.3593,[512]8.3605,[513]8.3510,[514]8.3548,[515]8.3591,[516]8.3668,[517]8.3613,[518]8.3446,[519]8.3424,[520]8.3378,[521]8.3300,[522]8.3238,[523]8.2982,[524]8.2927,[525]8.2929,[526]8.2956,[527]8.3039,[528]8.3050,[529]8.3145,[530]8.3249,[531]8.3347,[532]8.3437,[533]8.3527,[534]8.3680,[535]8.3650,[536]8.3640,[537]8.3542,[538]8.3489,[539]8.3406,[540]8.3377,[541]8.3408,[542]8.3405,[543]8.3397,[544]8.3362,[545]8.3378,[546]8.3344,[547]8.3331,[548]8.3439,[549]8.3497,[550]8.3620,[551]8.3603,[552]8.3590, | |
| llama_print_timings: load time = 53324.72 ms | |
| llama_print_timings: sample time = 0.00 ms / 1 runs ( 0.00 ms per token, inf tokens per second) | |
| llama_print_timings: prompt eval time = 548438.07 ms / 282624 tokens ( 1.94 ms per token, 515.33 tokens per second) | |
| llama_print_timings: eval time = 0.00 ms / 1 runs ( 0.00 ms per token, inf tokens per second) | |
| llama_print_timings: total time = 559799.36 ms / 282625 tokens | |
| Final estimate: PPL over 552 chunks for n_ctx=512 = 8.3590 +/- 0.06673 | |