Qwen3-Coder-Next-GGUF / logs /perplexity-Qwen3-Coder-Next-IQ4_KSS.log
ubergarm's picture
initial commit
57fc5bc
SOCKET is set to: 0
main: build = 4211 (b2cb4512)
main: built with cc (Ubuntu 13.3.0-6ubuntu2~24.04) 13.3.0 for x86_64-linux-gnu
main: seed = 1337
CPU: using device CPU - 0 MiB free
llama_model_loader: loaded meta data with 47 key-value pairs and 843 tensors from /mnt/data/models/ubergarm/Qwen3-Coder-Next-GGUF/Qwen3-Coder-Next-IQ4_KSS.gguf (version GGUF V3 (latest))
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv 0: general.architecture str = qwen3next
llama_model_loader: - kv 1: general.type str = model
llama_model_loader: - kv 2: general.sampling.top_k i32 = 40
llama_model_loader: - kv 3: general.sampling.top_p f32 = 0.950000
llama_model_loader: - kv 4: general.sampling.temp f32 = 1.000000
llama_model_loader: - kv 5: general.name str = Qwen3 Coder Next
llama_model_loader: - kv 6: general.size_label str = 512x2.5B
llama_model_loader: - kv 7: general.license str = apache-2.0
llama_model_loader: - kv 8: general.license.link str = https://huggingface.co/Qwen/Qwen3-Cod...
llama_model_loader: - kv 9: general.tags arr[str,1] = ["text-generation"]
llama_model_loader: - kv 10: qwen3next.block_count u32 = 48
llama_model_loader: - kv 11: qwen3next.context_length u32 = 262144
llama_model_loader: - kv 12: qwen3next.embedding_length u32 = 2048
llama_model_loader: - kv 13: qwen3next.feed_forward_length u32 = 5120
llama_model_loader: - kv 14: qwen3next.attention.head_count u32 = 16
llama_model_loader: - kv 15: qwen3next.attention.head_count_kv u32 = 2
llama_model_loader: - kv 16: qwen3next.rope.freq_base f32 = 5000000.000000
llama_model_loader: - kv 17: qwen3next.attention.layer_norm_rms_epsilon f32 = 0.000001
llama_model_loader: - kv 18: qwen3next.expert_count u32 = 512
llama_model_loader: - kv 19: qwen3next.expert_used_count u32 = 10
llama_model_loader: - kv 20: qwen3next.attention.key_length u32 = 256
llama_model_loader: - kv 21: qwen3next.attention.value_length u32 = 256
llama_model_loader: - kv 22: general.file_type u32 = 148
llama_model_loader: - kv 23: qwen3next.expert_feed_forward_length u32 = 512
llama_model_loader: - kv 24: qwen3next.expert_shared_feed_forward_length u32 = 512
llama_model_loader: - kv 25: qwen3next.ssm.conv_kernel u32 = 4
llama_model_loader: - kv 26: qwen3next.ssm.state_size u32 = 128
llama_model_loader: - kv 27: qwen3next.ssm.group_count u32 = 16
llama_model_loader: - kv 28: qwen3next.ssm.time_step_rank u32 = 32
llama_model_loader: - kv 29: qwen3next.ssm.inner_size u32 = 4096
llama_model_loader: - kv 30: qwen3next.full_attention_interval u32 = 4
llama_model_loader: - kv 31: qwen3next.rope.dimension_count u32 = 64
llama_model_loader: - kv 32: general.quantization_version u32 = 2
llama_model_loader: - kv 33: tokenizer.ggml.model str = gpt2
llama_model_loader: - kv 34: tokenizer.ggml.pre str = qwen2
llama_model_loader: - kv 35: tokenizer.ggml.tokens arr[str,151936] = ["!", "\"", "#", "$", "%", "&", "'", ...
llama_model_loader: - kv 36: tokenizer.ggml.token_type arr[i32,151936] = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...
llama_model_loader: - kv 37: tokenizer.ggml.merges arr[str,151387] = ["Ġ Ġ", "ĠĠ ĠĠ", "i n", "Ġ t",...
llama_model_loader: - kv 38: tokenizer.ggml.eos_token_id u32 = 151645
llama_model_loader: - kv 39: tokenizer.ggml.padding_token_id u32 = 151643
llama_model_loader: - kv 40: tokenizer.ggml.bos_token_id u32 = 151643
llama_model_loader: - kv 41: tokenizer.ggml.add_bos_token bool = false
llama_model_loader: - kv 42: tokenizer.chat_template str = {% macro render_extra_keys(json_dict,...
llama_model_loader: - kv 43: quantize.imatrix.file str = /mnt/data/models/ubergarm/Qwen3-Coder...
llama_model_loader: - kv 44: quantize.imatrix.dataset str = ubergarm-imatrix-calibration-corpus-v...
llama_model_loader: - kv 45: quantize.imatrix.entries_count i32 = 577
llama_model_loader: - kv 46: quantize.imatrix.chunks_count i32 = 840
llama_model_loader: - type f32: 361 tensors
llama_model_loader: - type q8_0: 336 tensors
llama_model_loader: - type iq6_k: 2 tensors
llama_model_loader: - type iq4_ks: 48 tensors
llama_model_loader: - type iq4_kss: 96 tensors
load: printing all EOG tokens:
load: - 151643 ('<|endoftext|>')
load: - 151645 ('<|im_end|>')
load: - 151662 ('<|fim_pad|>')
load: - 151663 ('<|repo_name|>')
load: - 151664 ('<|file_sep|>')
load: special tokens cache size = 26
load: token to piece cache size = 0.9311 MB
llm_load_print_meta: format = GGUF V3 (latest)
llm_load_print_meta: arch = qwen3next
llm_load_print_meta: n_ctx_train = 262144
llm_load_print_meta: n_embd = 2048
llm_load_print_meta: n_layer = 48
llm_load_print_meta: n_head = 16
llm_load_print_meta: n_head_kv = 2
llm_load_print_meta: n_rot = 64
llm_load_print_meta: n_swa = 0
llm_load_print_meta: n_swa_pattern = 1
llm_load_print_meta: n_embd_head_k = 256
llm_load_print_meta: n_embd_head_v = 256
llm_load_print_meta: n_gqa = 8
llm_load_print_meta: n_embd_k_gqa = 512
llm_load_print_meta: n_embd_v_gqa = 512
llm_load_print_meta: f_norm_eps = 0.0e+00
llm_load_print_meta: f_norm_rms_eps = 1.0e-06
llm_load_print_meta: f_clamp_kqv = 0.0e+00
llm_load_print_meta: f_max_alibi_bias = 0.0e+00
llm_load_print_meta: f_logit_scale = 0.0e+00
llm_load_print_meta: n_ff = 5120
llm_load_print_meta: n_expert = 512
llm_load_print_meta: n_expert_used = 10
llm_load_print_meta: causal attn = 1
llm_load_print_meta: pooling type = 0
llm_load_print_meta: rope type = 2
llm_load_print_meta: rope scaling = linear
llm_load_print_meta: freq_base_train = 5000000.0
llm_load_print_meta: freq_scale_train = 1
llm_load_print_meta: n_ctx_orig_yarn = 262144
llm_load_print_meta: rope_finetuned = unknown
llm_load_print_meta: ssm_d_conv = 4
llm_load_print_meta: ssm_d_inner = 4096
llm_load_print_meta: ssm_d_state = 128
llm_load_print_meta: ssm_dt_rank = 32
llm_load_print_meta: model type = 80B.A3B
llm_load_print_meta: model ftype = IQ4_KSS - 4.0 bpw
llm_load_print_meta: model params = 79.674 B
llm_load_print_meta: model size = 39.377 GiB (4.245 BPW)
llm_load_print_meta: repeating layers = 38.897 GiB (4.227 BPW, 79.052 B parameters)
llm_load_print_meta: general.name = Qwen3 Coder Next
print_info: vocab type = BPE
print_info: n_vocab = 151936
print_info: n_merges = 151387
print_info: BOS token = 151643 '<|endoftext|>'
print_info: EOS token = 151645 '<|im_end|>'
print_info: EOT token = 151645 '<|im_end|>'
print_info: PAD token = 151643 '<|endoftext|>'
print_info: LF token = 198 'Ċ'
print_info: FIM PRE token = 151659 '<|fim_prefix|>'
print_info: FIM SUF token = 151661 '<|fim_suffix|>'
print_info: FIM MID token = 151660 '<|fim_middle|>'
print_info: FIM PAD token = 151662 '<|fim_pad|>'
print_info: FIM REP token = 151663 '<|repo_name|>'
print_info: FIM SEP token = 151664 '<|file_sep|>'
print_info: EOG token = 151643 '<|endoftext|>'
print_info: EOG token = 151645 '<|im_end|>'
print_info: EOG token = 151662 '<|fim_pad|>'
print_info: EOG token = 151663 '<|repo_name|>'
print_info: EOG token = 151664 '<|file_sep|>'
print_info: max token length = 256
llm_load_tensors: ggml ctx size = 0.35 MiB
llm_load_tensors: offloading 0 repeating layers to GPU
llm_load_tensors: offloaded 0/49 layers to GPU
llm_load_tensors: CPU buffer size = 40322.46 MiB
....................................................................................................
llama_init_from_model: n_ctx = 2048
llama_init_from_model: n_batch = 2048
llama_init_from_model: n_ubatch = 512
llama_init_from_model: flash_attn = 1
llama_init_from_model: attn_max_b = 0
llama_init_from_model: fused_moe = 1
llama_init_from_model: grouped er = 0
llama_init_from_model: fused_up_gate = 1
llama_init_from_model: fused_mmad = 1
llama_init_from_model: rope_cache = 0
llama_init_from_model: graph_reuse = 1
llama_init_from_model: k_cache_hadam = 0
llama_init_from_model: split_mode_graph_scheduling = 0
llama_init_from_model: reduce_type = f16
llama_init_from_model: sched_async = 0
llama_init_from_model: ser = -1, 0
llama_init_from_model: freq_base = 5000000.0
llama_init_from_model: freq_scale = 1
llama_kv_cache_init: CPU KV buffer size = 349.50 MiB
llama_init_from_model: KV self size = 48.00 MiB, K (f16): 24.00 MiB, V (f16): 24.00 MiB
llama_init_from_model: CPU output buffer size = 2.32 MiB
llama_init_from_model: CPU compute buffer size = 300.75 MiB
llama_init_from_model: graph nodes = 12382
llama_init_from_model: graph splits = 1
llama_init_from_model: enabling only_active_experts scheduling
system_info: n_threads = 96 (n_threads_batch = 128) / 512 | AVX = 1 | AVX_VNNI = 1 | AVX2 = 1 | AVX512 = 1 | AVX512_VBMI = 1 | AVX512_VNNI = 1 | AVX512_BF16 = 1 | FMA = 1 | NEON = 0 | SVE = 0 | ARM_FMA = 0 | F16C = 1 | FP16_VA = 0 | WASM_SIMD = 0 | BLAS = 0 | SSE3 = 1 | SSSE3 = 1 | VSX = 0 | MATMUL_INT8 = 0 |
perplexity: tokenizing the input ..
perplexity: tokenization took 390.895 ms
perplexity: calculating perplexity over 584 chunks, n_ctx=512, batch_size=2048, n_seq=4
perplexity: 3.57 seconds per pass - ETA 8.67 minutes
===================================== llama_init_from_model: f16
======================================= HAVE_FANCY_SIMD is defined
[1]4.7708,[2]6.6455,[3]5.6847,[4]4.9300,[5]4.8482,[6]4.9616,[7]5.0382,[8]5.1594,[9]5.1162,[10]5.1752,[11]5.1005,[12]5.3326,[13]5.7391,[14]5.6986,[15]5.8049,[16]6.1672,[17]5.9825,[18]6.1662,[19]6.2517,[20]6.2809,[21]6.1879,[22]6.3014,[23]6.1039,[24]5.8551,[25]5.7720,[26]5.6545,[27]5.5737,[28]5.5205,[29]5.5937,[30]5.5868,[31]5.5772,[32]5.6498,[33]5.5949,[34]5.6422,[35]5.7332,[36]5.8198,[37]5.9514,[38]6.0595,[39]6.0941,[40]6.1921,[41]6.2363,[42]6.2663,[43]6.3373,[44]6.3414,[45]6.3754,[46]6.4259,[47]6.5950,[48]6.7029,[49]6.7117,[50]6.7649,[51]6.8072,[52]6.8791,[53]6.9402,[54]6.9868,[55]7.0063,[56]7.0823,[57]7.0897,[58]7.1329,[59]7.1791,[60]7.2232,[61]7.2709,[62]7.3073,[63]7.3673,[64]7.4293,[65]7.4972,[66]7.5685,[67]7.6299,[68]7.6186,[69]7.6415,[70]7.6493,[71]7.6847,[72]7.7552,[73]7.8107,[74]7.8395,[75]7.8152,[76]7.8270,[77]7.8976,[78]7.9346,[79]7.8476,[80]7.8241,[81]7.8209,[82]7.8591,[83]7.8315,[84]7.8261,[85]7.8472,[86]7.9303,[87]7.9743,[88]7.9955,[89]8.0109,[90]8.0011,[91]8.0545,[92]8.0347,[93]8.0797,[94]8.0940,[95]8.0805,[96]8.0713,[97]8.0645,[98]8.1005,[99]8.0796,[100]8.1641,[101]8.2137,[102]8.2089,[103]8.2180,[104]8.2042,[105]8.2038,[106]8.2012,[107]8.2370,[108]8.2704,[109]8.3100,[110]8.3700,[111]8.4787,[112]8.4877,[113]8.4508,[114]8.5037,[115]8.5319,[116]8.4806,[117]8.4850,[118]8.4776,[119]8.4453,[120]8.4664,[121]8.4540,[122]8.4422,[123]8.4035,[124]8.3625,[125]8.3447,[126]8.3292,[127]8.2807,[128]8.2685,[129]8.2353,[130]8.1914,[131]8.1579,[132]8.1311,[133]8.1250,[134]8.1375,[135]8.1307,[136]8.1300,[137]8.1008,[138]8.0728,[139]8.0903,[140]8.0761,[141]8.0732,[142]8.0946,[143]8.1000,[144]8.1349,[145]8.1126,[146]8.0758,[147]8.0396,[148]8.0002,[149]7.9802,[150]7.9379,[151]7.9272,[152]7.9191,[153]7.9154,[154]7.8789,[155]7.8823,[156]7.8432,[157]7.8223,[158]7.7976,[159]7.7800,[160]7.7416,[161]7.7246,[162]7.7171,[163]7.6999,[164]7.7098,[165]7.7018,[166]7.6948,[167]7.6935,[168]7.7147,[169]7.7185,[170]7.7491,[171]7.7541,[172]7.7784,[173]7.8294,[174]7.8438,[175]7.8979,[176]7.9243,[177]7.9767,[178]8.0162,[179]8.0179,[180]7.9923,[181]7.9633,[182]7.9752,[183]7.9436,[184]7.9282,[185]7.9042,[186]7.8789,[187]7.8603,[188]7.8537,[189]7.8686,[190]7.8953,[191]7.9074,[192]7.9170,[193]7.9192,[194]7.9375,[195]7.9540,[196]7.9615,[197]7.9671,[198]7.9489,[199]7.9394,[200]7.9261,[201]7.9266,[202]7.9436,[203]7.9686,[204]7.9891,[205]8.0084,[206]8.0125,[207]8.0387,[208]8.0273,[209]8.0280,[210]8.0243,[211]8.0272,[212]8.0306,[213]8.0277,[214]8.0158,[215]8.0004,[216]7.9936,[217]8.0013,[218]7.9968,[219]7.9762,[220]7.9453,[221]7.9323,[222]7.9186,[223]7.9170,[224]7.9242,[225]7.9042,[226]7.8961,[227]7.8846,[228]7.8597,[229]7.8333,[230]7.8165,[231]7.7974,[232]7.7846,[233]7.7802,[234]7.7780,[235]7.7770,[236]7.7625,[237]7.7533,[238]7.7389,[239]7.7335,[240]7.7430,[241]7.7529,[242]7.7647,[243]7.7618,[244]7.7762,[245]7.7791,[246]7.8023,[247]7.8114,[248]7.8167,[249]7.8264,[250]7.8289,[251]7.8477,[252]7.8657,[253]7.9007,[254]7.9252,[255]7.9294,[256]7.9466,[257]7.9614,[258]7.9483,[259]7.9333,[260]7.9183,[261]7.8962,[262]7.8837,[263]7.8777,[264]7.8748,[265]7.8830,[266]7.8881,[267]7.8878,[268]7.8785,[269]7.8835,[270]7.8793,[271]7.8743,[272]7.8703,[273]7.8680,[274]7.8638,[275]7.8589,[276]7.8453,[277]7.8457,[278]7.8447,[279]7.8363,[280]7.8316,[281]7.8266,[282]7.8243,[283]7.8001,[284]7.7717,[285]7.7814,[286]7.7652,[287]7.7490,[288]7.7467,[289]7.7427,[290]7.7647,[291]7.7694,[292]7.7681,[293]7.7700,[294]7.7871,[295]7.7983,[296]7.8088,[297]7.8317,[298]7.8294,[299]7.8206,[300]7.8214,[301]7.8153,[302]7.8173,[303]7.8125,[304]7.8376,[305]7.8428,[306]7.8415,[307]7.8455,[308]7.8452,[309]7.8442,[310]7.8499,[311]7.8534,[312]7.8437,[313]7.8385,[314]7.8450,[315]7.8327,[316]7.8349,[317]7.8499,[318]7.8570,[319]7.8505,[320]7.8528,[321]7.8423,[322]7.8527,[323]7.8618,[324]7.8682,[325]7.8885,[326]7.8866,[327]7.8754,[328]7.8789,[329]7.8652,[330]7.8568,[331]7.8508,[332]7.8505,[333]7.8524,[334]7.8493,[335]7.8398,[336]7.8421,[337]7.8490,[338]7.8612,[339]7.8580,[340]7.8533,[341]7.8457,[342]7.8455,[343]7.8444,[344]7.8513,[345]7.8598,[346]7.8563,[347]7.8430,[348]7.8450,[349]7.8427,[350]7.8326,[351]7.8323,[352]7.8360,[353]7.8358,[354]7.8259,[355]7.8389,[356]7.8488,[357]7.8537,[358]7.8448,[359]7.8493,[360]7.8489,[361]7.8587,[362]7.8503,[363]7.8448,[364]7.8520,[365]7.8703,[366]7.8962,[367]7.9124,[368]7.9423,[369]7.9580,[370]7.9734,[371]7.9971,[372]8.0169,[373]8.0274,[374]8.0364,[375]8.0557,[376]8.0694,[377]8.0815,[378]8.0946,[379]8.1071,[380]8.1237,[381]8.1408,[382]8.1519,[383]8.1605,[384]8.1724,[385]8.1982,[386]8.2193,[387]8.2191,[388]8.2206,[389]8.2297,[390]8.2537,[391]8.2719,[392]8.2657,[393]8.2648,[394]8.2577,[395]8.2585,[396]8.2668,[397]8.2752,[398]8.2817,[399]8.2895,[400]8.3013,[401]8.3027,[402]8.3024,[403]8.2938,[404]8.2712,[405]8.2583,[406]8.2576,[407]8.2657,[408]8.2750,[409]8.2769,[410]8.2870,[411]8.3046,[412]8.3100,[413]8.3086,[414]8.3064,[415]8.3013,[416]8.2942,[417]8.2989,[418]8.3078,[419]8.3119,[420]8.3128,[421]8.3198,[422]8.3088,[423]8.3079,[424]8.3108,[425]8.3141,[426]8.3153,[427]8.3221,[428]8.3369,[429]8.3446,[430]8.3405,[431]8.3363,[432]8.3409,[433]8.3446,[434]8.3456,[435]8.3543,[436]8.3482,[437]8.3534,[438]8.3553,[439]8.3501,[440]8.3548,[441]8.3546,[442]8.3523,[443]8.3448,[444]8.3471,[445]8.3381,[446]8.3400,[447]8.3342,[448]8.3285,[449]8.3229,[450]8.3289,[451]8.3287,[452]8.3163,[453]8.3074,[454]8.3043,[455]8.3097,[456]8.3081,[457]8.3134,[458]8.3283,[459]8.3248,[460]8.3247,[461]8.3227,[462]8.3213,[463]8.3326,[464]8.3318,[465]8.3329,[466]8.3351,[467]8.3406,[468]8.3455,[469]8.3505,[470]8.3561,[471]8.3456,[472]8.3546,[473]8.3439,[474]8.3426,[475]8.3478,[476]8.3458,[477]8.3364,[478]8.3214,[479]8.3242,[480]8.3318,[481]8.3356,[482]8.3254,[483]8.3338,[484]8.3415,[485]8.3458,[486]8.3452,[487]8.3507,[488]8.3452,[489]8.3343,[490]8.3328,[491]8.3271,[492]8.3271,[493]8.3183,[494]8.3167,[495]8.3110,[496]8.3078,[497]8.3199,[498]8.3263,[499]8.3184,[500]8.3181,[501]8.3182,[502]8.3161,[503]8.3296,[504]8.3328,[505]8.3363,[506]8.3338,[507]8.3307,[508]8.3350,[509]8.3319,[510]8.3307,[511]8.3334,[512]8.3293,[513]8.3317,[514]8.3353,[515]8.3350,[516]8.3378,[517]8.3407,[518]8.3343,[519]8.3345,[520]8.3369,[521]8.3388,[522]8.3295,[523]8.3290,[524]8.3263,[525]8.3301,[526]8.3357,[527]8.3384,[528]8.3378,[529]8.3318,[530]8.3280,[531]8.3313,[532]8.3288,[533]8.3279,[534]8.3279,[535]8.3294,[536]8.3227,[537]8.3285,[538]8.3369,[539]8.3337,[540]8.3459,[541]8.3483,[542]8.3428,[543]8.3452,[544]8.3522,[545]8.3481,[546]8.3405,[547]8.3328,[548]8.3173,[549]8.3178,[550]8.3012,[551]8.2903,[552]8.2809,[553]8.2540,[554]8.2528,[555]8.2562,[556]8.2567,[557]8.2595,[558]8.2589,[559]8.2656,[560]8.2721,[561]8.2812,[562]8.2938,[563]8.3020,[564]8.3000,[565]8.3091,[566]8.3090,[567]8.2960,[568]8.2876,[569]8.2842,[570]8.2838,[571]8.2835,[572]8.2867,[573]8.2874,[574]8.2889,[575]8.2885,[576]8.2944,[577]8.2890,[578]8.2944,[579]8.3000,[580]8.3142,[581]8.3155,[582]8.3276,[583]8.3126,[584]8.3069,
llama_print_timings: load time = 8105.40 ms
llama_print_timings: sample time = 0.00 ms / 1 runs ( 0.00 ms per token, inf tokens per second)
llama_print_timings: prompt eval time = 447048.08 ms / 299008 tokens ( 1.50 ms per token, 668.85 tokens per second)
llama_print_timings: eval time = 0.00 ms / 1 runs ( 0.00 ms per token, inf tokens per second)
llama_print_timings: total time = 458500.62 ms / 299009 tokens
Final estimate: PPL over 584 chunks for n_ctx=512 = 8.3069 +/- 0.06459