| model=/mnt/data/models/ubergarm/MiniMax-M2.7-GGUF/MiniMax-M2.7-256x4.9B-BF16-00001-of-00010.gguf |
|
|
| numactl -N "$SOCKET" -m "$SOCKET" \ |
| ./build/bin/llama-perplexity \ |
| -m "$model" \ |
| -f wiki.test.raw \ |
| --seed 1337 \ |
| --ctx-size 512 \ |
| -ub 4096 -b 4096 \ |
| --numa numactl \ |
| --threads 96 \ |
| --threads-batch 128 \ |
| --validate-quants \ |
| --no-mmap |
|
|
| SOCKET is set to: 0 |
| main: build = 4408 (08ae48c6) |
| main: built with cc (Ubuntu 13.3.0-6ubuntu2~24.04.1) 13.3.0 for x86_64-linux-gnu |
| main: seed = 1337 |
| CPU: using device CPU - 0 MiB free |
| llama_model_loader: additional 9 GGUFs metadata loaded. |
| llama_model_loader: loaded meta data with 40 key-value pairs and 809 tensors from /mnt/data/models/ubergarm/MiniMax-M2.7-GGUF/MiniMax-M2.7-256x4.9B-BF16-00001-of-00010.gguf (version GGUF V3 (latest)) |
| llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output. |
| llama_model_loader: - kv 0: general.architecture str = minimax-m2 |
| llama_model_loader: - kv 1: general.type str = model |
| llama_model_loader: - kv 2: general.sampling.top_k i32 = 40 |
| llama_model_loader: - kv 3: general.sampling.top_p f32 = 0.950000 |
| llama_model_loader: - kv 4: general.sampling.temp f32 = 1.000000 |
| llama_model_loader: - kv 5: general.name str = MiniMax M2.7 |
| llama_model_loader: - kv 6: general.size_label str = 256x4.9B |
| llama_model_loader: - kv 7: general.license str = other |
| llama_model_loader: - kv 8: general.license.name str = modified-mit |
| llama_model_loader: - kv 9: general.license.link str = https: |
| llama_model_loader: - kv 10: general.tags arr[str,1] = ["text-generation"] |
| llama_model_loader: - kv 11: minimax-m2.block_count u32 = 62 |
| llama_model_loader: - kv 12: minimax-m2.context_length u32 = 196608 |
| llama_model_loader: - kv 13: minimax-m2.embedding_length u32 = 3072 |
| llama_model_loader: - kv 14: minimax-m2.feed_forward_length u32 = 1536 |
| llama_model_loader: - kv 15: minimax-m2.attention.head_count u32 = 48 |
| llama_model_loader: - kv 16: minimax-m2.attention.head_count_kv u32 = 8 |
| llama_model_loader: - kv 17: minimax-m2.rope.freq_base f32 = 5000000.000000 |
| llama_model_loader: - kv 18: minimax-m2.attention.layer_norm_rms_epsilon f32 = 0.000001 |
| llama_model_loader: - kv 19: minimax-m2.expert_count u32 = 256 |
| llama_model_loader: - kv 20: minimax-m2.expert_used_count u32 = 8 |
| llama_model_loader: - kv 21: minimax-m2.expert_gating_func u32 = 2 |
| llama_model_loader: - kv 22: minimax-m2.attention.key_length u32 = 128 |
| llama_model_loader: - kv 23: minimax-m2.attention.value_length u32 = 128 |
| llama_model_loader: - kv 24: general.file_type u32 = 32 |
| llama_model_loader: - kv 25: minimax-m2.expert_feed_forward_length u32 = 1536 |
| llama_model_loader: - kv 26: minimax-m2.rope.dimension_count u32 = 64 |
| llama_model_loader: - kv 27: general.quantization_version u32 = 2 |
| llama_model_loader: - kv 28: tokenizer.ggml.model str = gpt2 |
| llama_model_loader: - kv 29: tokenizer.ggml.pre str = minimax-m2 |
| llama_model_loader: - kv 30: tokenizer.ggml.tokens arr[str,200064] = ["Ā", "ā", "Ă", "ă", "Ą", "ą", ...llama_model_loader: - kv 31: tokenizer.ggml.token_type arr[i32,200064] = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...llama_model_loader: - kv 32: tokenizer.ggml.merges arr[str,199744] = ["Ġ Ġ", "Ġ t", "Ġ a", "i n", "e r... |
| llama_model_loader: - kv 33: tokenizer.ggml.bos_token_id u32 = 200034 |
| llama_model_loader: - kv 34: tokenizer.ggml.eos_token_id u32 = 200020 |
| llama_model_loader: - kv 35: tokenizer.ggml.unknown_token_id u32 = 200021 |
| llama_model_loader: - kv 36: tokenizer.chat_template str = {# ----------‑‑‑ special token ... |
| llama_model_loader: - kv 37: split.no u16 = 0 |
| llama_model_loader: - kv 38: split.count u16 = 10 |
| llama_model_loader: - kv 39: split.tensors.count i32 = 809 |
| llama_model_loader: - type f32: 373 tensors |
| llama_model_loader: - type bf16: 436 tensors |
| load: 0 unused tokens |
| load: special_eos_id is not in special_eog_ids - the tokenizer config may be incorrect |
| load: printing all EOG tokens: |
| load: - 200004 ('<fim_pad>') |
| load: - 200005 ('<reponame>') |
| load: - 200020 ('[e~[') |
| load: special tokens cache size = 54 |
| load: token to piece cache size = 1.3355 MB |
| llm_load_print_meta: format = GGUF V3 (latest) |
| llm_load_print_meta: arch = minimax-m2 |
| llm_load_print_meta: n_ctx_train = 196608 |
| llm_load_print_meta: n_embd = 3072 |
| llm_load_print_meta: n_layer = 62 |
| llm_load_print_meta: n_head = 48 |
| llm_load_print_meta: n_head_kv = 8 |
| llm_load_print_meta: n_rot = 64 |
| llm_load_print_meta: n_swa = 0 |
| llm_load_print_meta: n_swa_pattern = 1 |
| llm_load_print_meta: n_embd_head_k = 128 |
| llm_load_print_meta: n_embd_head_v = 128 |
| llm_load_print_meta: n_gqa = 6 |
| llm_load_print_meta: n_embd_k_gqa = 1024 |
| llm_load_print_meta: n_embd_v_gqa = 1024 |
| llm_load_print_meta: f_norm_eps = 0.0e+00 |
| llm_load_print_meta: f_norm_rms_eps = 1.0e-06 |
| llm_load_print_meta: f_clamp_kqv = 0.0e+00 |
| llm_load_print_meta: f_max_alibi_bias = 0.0e+00 |
| llm_load_print_meta: f_logit_scale = 0.0e+00 |
| llm_load_print_meta: n_ff = 1536 |
| llm_load_print_meta: n_expert = 256 |
| llm_load_print_meta: n_expert_used = 8 |
| llm_load_print_meta: causal attn = 1 |
| llm_load_print_meta: pooling type = 0 |
| llm_load_print_meta: rope type = 2 |
| llm_load_print_meta: rope scaling = linear |
| llm_load_print_meta: freq_base_train = 5000000.0 |
| llm_load_print_meta: freq_scale_train = 1 |
| llm_load_print_meta: n_ctx_orig_yarn = 196608 |
| llm_load_print_meta: rope_finetuned = unknown |
| llm_load_print_meta: ssm_d_conv = 0 |
| llm_load_print_meta: ssm_d_inner = 0 |
| llm_load_print_meta: ssm_d_state = 0 |
| llm_load_print_meta: ssm_dt_rank = 0 |
| llm_load_print_meta: ssm_n_group = 0 |
| llm_load_print_meta: model type = 230B.A10B |
| llm_load_print_meta: model ftype = BF16 |
| llm_load_print_meta: model params = 228.690 B |
| llm_load_print_meta: model size = 426.060 GiB (16.003 BPW) |
| llm_load_print_meta: repeating layers = 423.771 GiB (16.003 BPW, 227.461 B parameters) |
| llm_load_print_meta: general.name = MiniMax M2.7 |
| print_info: vocab type = BPE |
| print_info: n_vocab = 200064 |
| print_info: n_merges = 199744 |
| print_info: BOS token = 200034 ']~!b[' |
| print_info: EOS token = 200020 '[e~[' |
| print_info: UNK token = 200021 ']!d~[' |
| print_info: LF token = 10 'Ċ' |
| print_info: FIM PRE token = 200001 '<fim_prefix>' |
| print_info: FIM SUF token = 200003 '<fim_suffix>' |
| print_info: FIM MID token = 200002 '<fim_middle>' |
| print_info: FIM PAD token = 200004 '<fim_pad>' |
| print_info: FIM REP token = 200005 '<reponame>' |
| print_info: EOG token = 200004 '<fim_pad>' |
| print_info: EOG token = 200005 '<reponame>' |
| print_info: EOG token = 200020 '[e~[' |
| print_info: max token length = 256 |
| ======================================= HAVE_FANCY_SIMD is defined |
| Free memory 0 MiB on device 0 is less the 1024 MiB safety margin |
| ------------------- Layer sizes: |
| Layer 0: 6999.05, 16.00, 7015.05 864.00 MiB |
| Layer 1: 6999.05, 16.00, 7015.05 864.00 MiB |
| Layer 2: 6999.05, 16.00, 7015.05 864.00 MiB |
| Layer 3: 6999.05, 16.00, 7015.05 864.00 MiB |
| Layer 4: 6999.05, 16.00, 7015.05 864.00 MiB |
| Layer 5: 6999.05, 16.00, 7015.05 864.00 MiB |
| Layer 6: 6999.05, 16.00, 7015.05 864.00 MiB |
| Layer 7: 6999.05, 16.00, 7015.05 864.00 MiB |
| Layer 8: 6999.05, 16.00, 7015.05 864.00 MiB |
| Layer 9: 6999.05, 16.00, 7015.05 864.00 MiB |
| Layer 10: 6999.05, 16.00, 7015.05 864.00 MiB |
| Layer 11: 6999.05, 16.00, 7015.05 864.00 MiB |
| Layer 12: 6999.05, 16.00, 7015.05 864.00 MiB |
| Layer 13: 6999.05, 16.00, 7015.05 864.00 MiB |
| Layer 14: 6999.05, 16.00, 7015.05 864.00 MiB |
| Layer 15: 6999.05, 16.00, 7015.05 864.00 MiB |
| Layer 16: 6999.05, 16.00, 7015.05 864.00 MiB |
| Layer 17: 6999.05, 16.00, 7015.05 864.00 MiB |
| Layer 18: 6999.05, 16.00, 7015.05 864.00 MiB |
| Layer 19: 6999.05, 16.00, 7015.05 864.00 MiB |
| Layer 20: 6999.05, 16.00, 7015.05 864.00 MiB |
| Layer 21: 6999.05, 16.00, 7015.05 864.00 MiB |
| Layer 22: 6999.05, 16.00, 7015.05 864.00 MiB |
| Layer 23: 6999.05, 16.00, 7015.05 864.00 MiB |
| Layer 24: 6999.05, 16.00, 7015.05 864.00 MiB |
| Layer 25: 6999.05, 16.00, 7015.05 864.00 MiB |
| Layer 26: 6999.05, 16.00, 7015.05 864.00 MiB |
| Layer 27: 6999.05, 16.00, 7015.05 864.00 MiB |
| Layer 28: 6999.05, 16.00, 7015.05 864.00 MiB |
| Layer 29: 6999.05, 16.00, 7015.05 864.00 MiB |
| Layer 30: 6999.05, 16.00, 7015.05 864.00 MiB |
| Layer 31: 6999.05, 16.00, 7015.05 864.00 MiB |
| Layer 32: 6999.05, 16.00, 7015.05 864.00 MiB |
| Layer 33: 6999.05, 16.00, 7015.05 864.00 MiB |
| Layer 34: 6999.05, 16.00, 7015.05 864.00 MiB |
| Layer 35: 6999.05, 16.00, 7015.05 864.00 MiB |
| Layer 36: 6999.05, 16.00, 7015.05 864.00 MiB |
| Layer 37: 6999.05, 16.00, 7015.05 864.00 MiB |
| Layer 38: 6999.05, 16.00, 7015.05 864.00 MiB |
| Layer 39: 6999.05, 16.00, 7015.05 864.00 MiB |
| Layer 40: 6999.05, 16.00, 7015.05 864.00 MiB |
| Layer 41: 6999.05, 16.00, 7015.05 864.00 MiB |
| Layer 42: 6999.05, 16.00, 7015.05 864.00 MiB |
| Layer 43: 6999.05, 16.00, 7015.05 864.00 MiB |
| Layer 44: 6999.05, 16.00, 7015.05 864.00 MiB |
| Layer 45: 6999.05, 16.00, 7015.05 864.00 MiB |
| Layer 46: 6999.05, 16.00, 7015.05 864.00 MiB |
| Layer 47: 6999.05, 16.00, 7015.05 864.00 MiB |
| Layer 48: 6999.05, 16.00, 7015.05 864.00 MiB |
| Layer 49: 6999.05, 16.00, 7015.05 864.00 MiB |
| Layer 50: 6999.05, 16.00, 7015.05 864.00 MiB |
| Layer 51: 6999.05, 16.00, 7015.05 864.00 MiB |
| Layer 52: 6999.05, 16.00, 7015.05 864.00 MiB |
| Layer 53: 6999.05, 16.00, 7015.05 864.00 MiB |
| Layer 54: 6999.05, 16.00, 7015.05 864.00 MiB |
| Layer 55: 6999.05, 16.00, 7015.05 864.00 MiB |
| Layer 56: 6999.05, 16.00, 7015.05 864.00 MiB |
| Layer 57: 6999.05, 16.00, 7015.05 864.00 MiB |
| Layer 58: 6999.05, 16.00, 7015.05 864.00 MiB |
| Layer 59: 6999.05, 16.00, 7015.05 864.00 MiB |
| Layer 60: 6999.05, 16.00, 7015.05 864.00 MiB |
| Layer 61: 6999.05, 16.00, 7015.05 864.00 MiB |
| Layer 62: 1172.25, 2262.00, 3434.25 MiB (output layer) |
| -------------------------------------------------------------------------- |
| Total : 433941.21, 3254.00, 437195.21 MiB |
| Free memory 0 MiB on device 0 is less the required compute buffer size 864 MiB |
| Memory required for model tensors + cache: 438367 MiB |
| Memory available on all devices - compute: 0 MiB |
| llm_load_tensors: ggml ctx size = 0.35 MiB |
| llm_load_tensors: offloading 0 repeating layers to GPU |
| llm_load_tensors: offloaded 0/63 layers to GPU |
| llm_load_tensors: CPU buffer size = 436285.72 MiB |
| .................................................................................................... |
| llama_init_from_model: n_ctx = 4096 |
| llama_init_from_model: n_batch = 4096 |
| llama_init_from_model: n_ubatch = 4096 |
| llama_init_from_model: flash_attn = 1 |
| llama_init_from_model: attn_max_b = 0 |
| llama_init_from_model: fused_moe = 1 |
| llama_init_from_model: grouped er = 0 |
| llama_init_from_model: fused_up_gate = 1 |
| llama_init_from_model: fused_mmad = 1 |
| llama_init_from_model: rope_cache = 0 |
| llama_init_from_model: graph_reuse = 1 |
| llama_init_from_model: k_cache_hadam = 0 |
| llama_init_from_model: v_cache_hadam = 0 |
| llama_init_from_model: split_mode_graph_scheduling = 0 |
| llama_init_from_model: reduce_type = f16 |
| llama_init_from_model: sched_async = 0 |
| llama_init_from_model: ser = -1, 0 |
| llama_init_from_model: freq_base = 5000000.0 |
| llama_init_from_model: freq_scale = 1 |
| llama_kv_cache_init: CPU KV buffer size = 992.00 MiB |
| llama_init_from_model: KV self size = 992.00 MiB, K (f16): 496.00 MiB, V (f16): 496.00 MiB |
| llama_init_from_model: CPU output buffer size = 6.11 MiB |
| llama_init_from_model: CPU compute buffer size = 3222.00 MiB |
| llama_init_from_model: graph nodes = 2361 |
| llama_init_from_model: graph splits = 1 |
| llama_init_from_model: enabling only_active_experts scheduling |
| |
| system_info: n_threads = 96 (n_threads_batch = 128) / 512 | AVX = 1 | AVX_VNNI = 1 | AVX2 = 1 | AVX512 = 1 | AVX512_VBMI = 1 | AVX512_VNNI = 1 | AVX512_BF16 = 1 | FMA = 1 | NEON = 0 | SVE = 0 | ARM_FMA = 0 | F16C = 1 | FP16_VA = 0 | WASM_SIMD = 0 | BLAS = 0 | SSE3 = 1 | SSSE3 = 1 | VSX = 0 | MATMUL_INT8 = 0 | |
| perplexity: tokenizing the input .. |
| perplexity: tokenization took 610.982 ms |
| perplexity: calculating perplexity over 552 chunks, n_ctx=512, batch_size=4096, n_seq=8 |
| perplexity: 9.46 seconds per pass - ETA 10.87 minutes |
| [1]3.6656,[2]4.5631,[3]4.1041,[4]4.6958,[5]5.0340,[6]5.5438,[7]5.9163,[8]6.8530,[9]7.2490,[10]7.4509,[11]7.5664,[12]7.9755,[13]8.0297,[14]7.8854,[15]8.0616,[16]7.6778,[17]7.8172,[18]7.7946,[19]7.6677,[20]7.4355,[21]7.3612,[22]7.1414,[23]6.9067,[24]6.7766,[25]6.4477,[26]6.2765,[27]6.4009,[28]6.3920,[29]6.4380,[30]6.4461,[31]6.3949,[32]6.4372,[33]6.5454,[34]6.7099,[35]6.8250,[36]6.7806,[37]6.8260,[38]6.8941,[39]6.8900,[40]6.9985,[41]7.0597,[42]7.0173,[43]7.0260,[44]7.1724,[45]7.2612,[46]7.2440,[47]7.2149,[48]7.2205,[49]7.2506,[50]7.3211,[51]7.3520,[52]7.3695,[53]7.4219,[54]7.4346,[55]7.4775,[56]7.4507,[57]7.4740,[58]7.4490,[59]7.4850,[60]7.5498,[61]7.6384,[62]7.7100,[63]7.7606,[64]7.7537,[65]7.7692,[66]7.7704,[67]7.7934,[68]7.8412,[69]7.8594,[70]7.8703,[71]7.8129,[72]7.8159,[73]7.8585,[74]7.8558,[75]7.7418,[76]7.6841,[77]7.6934,[78]7.7092,[79]7.7171,[80]7.7191,[81]7.7634,[82]7.7546,[83]7.7448,[84]7.7559,[85]7.7534,[86]7.8339,[87]7.8359,[88]7.8626,[89]7.8747,[90]7.8740,[91]7.8710,[92]7.8374,[93]7.8478,[94]7.8432,[95]7.8956,[96]7.9086,[97]7.9279,[98]7.9300,[99]7.9195,[100]7.9128,[101]7.9824,[102]8.0289,[103]8.0885,[104]8.1176,[105]8.1968,[106]8.2238,[107]8.1898,[108]8.2690,[109]8.3158,[110]8.2866,[111]8.2418,[112]8.2384,[113]8.1935,[114]8.1922,[115]8.1455,[116]8.1232,[117]8.0821,[118]8.0651,[119]8.0140,[120]7.9778,[121]7.9449,[122]7.8790,[123]7.8340,[124]7.7965,[125]7.7510,[126]7.7388,[127]7.7407,[128]7.7545,[129]7.7457,[130]7.7381,[131]7.7453,[132]7.7583,[133]7.7659,[134]7.7828,[135]7.7795,[136]7.7707,[137]7.7718,[138]7.7248,[139]7.6961,[140]7.6593,[141]7.6323,[142]7.5865,[143]7.5469,[144]7.5187,[145]7.5080,[146]7.4787,[147]7.4531,[148]7.4052,[149]7.3720,[150]7.3499,[151]7.3296,[152]7.3032,[153]7.2954,[154]7.2732,[155]7.2714,[156]7.2560,[157]7.2571,[158]7.2651,[159]7.2700,[160]7.2895,[161]7.3086,[162]7.3532,[163]7.3901,[164]7.4233,[165]7.4771,[166]7.4971,[167]7.5315,[168]7.5600,[169]7.5738,[170]7.5733,[171]7.5736,[172]7.5996,[173]7.5717,[174]7.5806,[175]7.5855,[176]7.5932,[177]7.5970,[178]7.5951,[179]7.6304,[180]7.6630,[181]7.6861,[182]7.6910,[183]7.7167,[184]7.7574,[185]7.7880,[186]7.8066,[187]7.8162,[188]7.8144,[189]7.7919,[190]7.7909,[191]7.7782,[192]7.8051,[193]7.8274,[194]7.8509,[195]7.8479,[196]7.8603,[197]7.8372,[198]7.8671,[199]7.8418,[200]7.8287,[201]7.8141,[202]7.7981,[203]7.7848,[204]7.7832,[205]7.7930,[206]7.8023,[207]7.7837,[208]7.7542,[209]7.7405,[210]7.7391,[211]7.7234,[212]7.7204,[213]7.7098,[214]7.6778,[215]7.6529,[216]7.6413,[217]7.6188,[218]7.6045,[219]7.5990,[220]7.5952,[221]7.5911,[222]7.5647,[223]7.5557,[224]7.5500,[225]7.5435,[226]7.5416,[227]7.5478,[228]7.5551,[229]7.5528,[230]7.5685,[231]7.5729,[232]7.6018,[233]7.6216,[234]7.6339,[235]7.6444,[236]7.6613,[237]7.6791,[238]7.6814,[239]7.6993,[240]7.7307,[241]7.7472,[242]7.7498,[243]7.7599,[244]7.7536,[245]7.7211,[246]7.7042,[247]7.6853,[248]7.6771,[249]7.6767,[250]7.6847,[251]7.6839,[252]7.6740,[253]7.6611,[254]7.6636,[255]7.6494,[256]7.6359,[257]7.6252,[258]7.6126,[259]7.6127,[260]7.6112,[261]7.5924,[262]7.5901,[263]7.5764,[264]7.5703,[265]7.5635,[266]7.5413,[267]7.5431,[268]7.5119,[269]7.5022,[270]7.4985,[271]7.4915,[272]7.4801,[273]7.4802,[274]7.4959,[275]7.5045,[276]7.5137,[277]7.5219,[278]7.5279,[279]7.5386,[280]7.5489,[281]7.5641,[282]7.5562,[283]7.5491,[284]7.5521,[285]7.5470,[286]7.5421,[287]7.5376,[288]7.5509,[289]7.5615,[290]7.5601,[291]7.5600,[292]7.5600,[293]7.5600,[294]7.5642,[295]7.5703,[296]7.5716,[297]7.5745,[298]7.5763,[299]7.5789,[300]7.5901,[301]7.5973,[302]7.5894,[303]7.5843,[304]7.5729,[305]7.5810,[306]7.5904,[307]7.6015,[308]7.6221,[309]7.6225,[310]7.6362,[311]7.6292,[312]7.6322,[313]7.6230,[314]7.6165,[315]7.6209,[316]7.6119,[317]7.6110,[318]7.6191,[319]7.6134,[320]7.6291,[321]7.6255,[322]7.6268,[323]7.6223,[324]7.6176,[325]7.6149,[326]7.6256,[327]7.6305,[328]7.6273,[329]7.6250,[330]7.6151,[331]7.6053,[332]7.5948,[333]7.5961,[334]7.5910,[335]7.5808,[336]7.5913,[337]7.5987,[338]7.6104,[339]7.6035,[340]7.6011,[341]7.5970,[342]7.6041,[343]7.5974,[344]7.5911,[345]7.6001,[346]7.6168,[347]7.6384,[348]7.6624,[349]7.6719,[350]7.6895,[351]7.7089,[352]7.7214,[353]7.7347,[354]7.7402,[355]7.7521,[356]7.7630,[357]7.7596,[358]7.7733,[359]7.7882,[360]7.7959,[361]7.8080,[362]7.8177,[363]7.8311,[364]7.8410,[365]7.8624,[366]7.8731,[367]7.8729,[368]7.8774,[369]7.8842,[370]7.9083,[371]7.9246,[372]7.9287,[373]7.9194,[374]7.9171,[375]7.9213,[376]7.9309,[377]7.9345,[378]7.9434,[379]7.9504,[380]7.9578,[381]7.9718,[382]7.9681,[383]7.9411,[384]7.9353,[385]7.9267,[386]7.9317,[387]7.9356,[388]7.9338,[389]7.9387,[390]7.9460,[391]7.9365,[392]7.9248,[393]7.9228,[394]7.9101,[395]7.9025,[396]7.9018,[397]7.9011,[398]7.8880,[399]7.8775,[400]7.8688,[401]7.8612,[402]7.8511,[403]7.8405,[404]7.8306,[405]7.8311,[406]7.8418,[407]7.8508,[408]7.8413,[409]7.8336,[410]7.8379,[411]7.8256,[412]7.8267,[413]7.8268,[414]7.8239,[415]7.8266,[416]7.8194,[417]7.8139,[418]7.8066,[419]7.8058,[420]7.8012,[421]7.7983,[422]7.7938,[423]7.7919,[424]7.7852,[425]7.7769,[426]7.7630,[427]7.7596,[428]7.7505,[429]7.7392,[430]7.7274,[431]7.7171,[432]7.7225,[433]7.7362,[434]7.7447,[435]7.7575,[436]7.7544,[437]7.7531,[438]7.7527,[439]7.7596,[440]7.7589,[441]7.7615,[442]7.7646,[443]7.7763,[444]7.7848,[445]7.7860,[446]7.7909,[447]7.7838,[448]7.7855,[449]7.7782,[450]7.7856,[451]7.7936,[452]7.7931,[453]7.7908,[454]7.7827,[455]7.7849,[456]7.7958,[457]7.7969,[458]7.8021,[459]7.8115,[460]7.8158,[461]7.8152,[462]7.8202,[463]7.8206,[464]7.8237,[465]7.8228,[466]7.8170,[467]7.8193,[468]7.8168,[469]7.8129,[470]7.8143,[471]7.8194,[472]7.8303,[473]7.8229,[474]7.8264,[475]7.8234,[476]7.8283,[477]7.8393,[478]7.8438,[479]7.8503,[480]7.8590,[481]7.8611,[482]7.8603,[483]7.8674,[484]7.8730,[485]7.8667,[486]7.8620,[487]7.8584,[488]7.8555,[489]7.8530,[490]7.8443,[491]7.8481,[492]7.8480,[493]7.8589,[494]7.8463,[495]7.8454,[496]7.8446,[497]7.8486,[498]7.8556,[499]7.8593,[500]7.8531,[501]7.8458,[502]7.8390,[503]7.8475,[504]7.8478,[505]7.8499,[506]7.8550,[507]7.8512,[508]7.8536,[509]7.8647,[510]7.8598,[511]7.8729,[512]7.8756,[513]7.8680,[514]7.8727,[515]7.8777,[516]7.8826,[517]7.8779,[518]7.8622,[519]7.8625,[520]7.8586,[521]7.8514,[522]7.8456,[523]7.8211,[524]7.8176,[525]7.8179,[526]7.8212,[527]7.8283,[528]7.8285,[529]7.8382,[530]7.8471,[531]7.8564,[532]7.8649,[533]7.8717,[534]7.8862,[535]7.8828,[536]7.8837,[537]7.8719,[538]7.8671,[539]7.8622,[540]7.8587,[541]7.8622,[542]7.8632,[543]7.8620,[544]7.8573,[545]7.8589,[546]7.8547,[547]7.8530,[548]7.8598,[549]7.8650,[550]7.8771,[551]7.8755,[552]7.8743, |
| llama_print_timings: load time = 82834.14 ms |
| llama_print_timings: sample time = 0.00 ms / 1 runs ( 0.00 ms per token, inf tokens per second) |
| llama_print_timings: prompt eval time = 525259.74 ms / 282624 tokens ( 1.86 ms per token, 538.07 tokens per second) |
| llama_print_timings: eval time = 0.00 ms / 1 runs ( 0.00 ms per token, inf tokens per second) |
| llama_print_timings: total time = 536052.62 ms / 282625 tokens |
| |
| Final estimate: PPL over 552 chunks for n_ctx=512 = 7.8743 +/- 0.05993 |
| |