MiniMax-M2.7-GGUF / logs /perplexity-MiniMax-M2.7-BF16.log
ubergarm's picture
add perplexity data
b39e25f
model=/mnt/data/models/ubergarm/MiniMax-M2.7-GGUF/MiniMax-M2.7-256x4.9B-BF16-00001-of-00010.gguf
numactl -N "$SOCKET" -m "$SOCKET" \
./build/bin/llama-perplexity \
-m "$model" \
-f wiki.test.raw \
--seed 1337 \
--ctx-size 512 \
-ub 4096 -b 4096 \
--numa numactl \
--threads 96 \
--threads-batch 128 \
--validate-quants \
--no-mmap
SOCKET is set to: 0
main: build = 4408 (08ae48c6)
main: built with cc (Ubuntu 13.3.0-6ubuntu2~24.04.1) 13.3.0 for x86_64-linux-gnu
main: seed = 1337
CPU: using device CPU - 0 MiB free
llama_model_loader: additional 9 GGUFs metadata loaded.
llama_model_loader: loaded meta data with 40 key-value pairs and 809 tensors from /mnt/data/models/ubergarm/MiniMax-M2.7-GGUF/MiniMax-M2.7-256x4.9B-BF16-00001-of-00010.gguf (version GGUF V3 (latest))
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv 0: general.architecture str = minimax-m2
llama_model_loader: - kv 1: general.type str = model
llama_model_loader: - kv 2: general.sampling.top_k i32 = 40
llama_model_loader: - kv 3: general.sampling.top_p f32 = 0.950000
llama_model_loader: - kv 4: general.sampling.temp f32 = 1.000000
llama_model_loader: - kv 5: general.name str = MiniMax M2.7
llama_model_loader: - kv 6: general.size_label str = 256x4.9B
llama_model_loader: - kv 7: general.license str = other
llama_model_loader: - kv 8: general.license.name str = modified-mit
llama_model_loader: - kv 9: general.license.link str = https://github.com/MiniMax-AI/MiniMax...
llama_model_loader: - kv 10: general.tags arr[str,1] = ["text-generation"]
llama_model_loader: - kv 11: minimax-m2.block_count u32 = 62
llama_model_loader: - kv 12: minimax-m2.context_length u32 = 196608
llama_model_loader: - kv 13: minimax-m2.embedding_length u32 = 3072
llama_model_loader: - kv 14: minimax-m2.feed_forward_length u32 = 1536
llama_model_loader: - kv 15: minimax-m2.attention.head_count u32 = 48
llama_model_loader: - kv 16: minimax-m2.attention.head_count_kv u32 = 8
llama_model_loader: - kv 17: minimax-m2.rope.freq_base f32 = 5000000.000000
llama_model_loader: - kv 18: minimax-m2.attention.layer_norm_rms_epsilon f32 = 0.000001
llama_model_loader: - kv 19: minimax-m2.expert_count u32 = 256
llama_model_loader: - kv 20: minimax-m2.expert_used_count u32 = 8
llama_model_loader: - kv 21: minimax-m2.expert_gating_func u32 = 2
llama_model_loader: - kv 22: minimax-m2.attention.key_length u32 = 128
llama_model_loader: - kv 23: minimax-m2.attention.value_length u32 = 128
llama_model_loader: - kv 24: general.file_type u32 = 32
llama_model_loader: - kv 25: minimax-m2.expert_feed_forward_length u32 = 1536
llama_model_loader: - kv 26: minimax-m2.rope.dimension_count u32 = 64
llama_model_loader: - kv 27: general.quantization_version u32 = 2
llama_model_loader: - kv 28: tokenizer.ggml.model str = gpt2
llama_model_loader: - kv 29: tokenizer.ggml.pre str = minimax-m2
llama_model_loader: - kv 30: tokenizer.ggml.tokens arr[str,200064] = ["Ā", "ā", "Ă", "ă", "Ą", "ą", ...llama_model_loader: - kv 31: tokenizer.ggml.token_type arr[i32,200064] = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...llama_model_loader: - kv 32: tokenizer.ggml.merges arr[str,199744] = ["Ġ Ġ", "Ġ t", "Ġ a", "i n", "e r...
llama_model_loader: - kv 33: tokenizer.ggml.bos_token_id u32 = 200034
llama_model_loader: - kv 34: tokenizer.ggml.eos_token_id u32 = 200020
llama_model_loader: - kv 35: tokenizer.ggml.unknown_token_id u32 = 200021
llama_model_loader: - kv 36: tokenizer.chat_template str = {# ----------‑‑‑ special token ...
llama_model_loader: - kv 37: split.no u16 = 0
llama_model_loader: - kv 38: split.count u16 = 10
llama_model_loader: - kv 39: split.tensors.count i32 = 809
llama_model_loader: - type f32: 373 tensors
llama_model_loader: - type bf16: 436 tensors
load: 0 unused tokens
load: special_eos_id is not in special_eog_ids - the tokenizer config may be incorrect
load: printing all EOG tokens:
load: - 200004 ('<fim_pad>')
load: - 200005 ('<reponame>')
load: - 200020 ('[e~[')
load: special tokens cache size = 54
load: token to piece cache size = 1.3355 MB
llm_load_print_meta: format = GGUF V3 (latest)
llm_load_print_meta: arch = minimax-m2
llm_load_print_meta: n_ctx_train = 196608
llm_load_print_meta: n_embd = 3072
llm_load_print_meta: n_layer = 62
llm_load_print_meta: n_head = 48
llm_load_print_meta: n_head_kv = 8
llm_load_print_meta: n_rot = 64
llm_load_print_meta: n_swa = 0
llm_load_print_meta: n_swa_pattern = 1
llm_load_print_meta: n_embd_head_k = 128
llm_load_print_meta: n_embd_head_v = 128
llm_load_print_meta: n_gqa = 6
llm_load_print_meta: n_embd_k_gqa = 1024
llm_load_print_meta: n_embd_v_gqa = 1024
llm_load_print_meta: f_norm_eps = 0.0e+00
llm_load_print_meta: f_norm_rms_eps = 1.0e-06
llm_load_print_meta: f_clamp_kqv = 0.0e+00
llm_load_print_meta: f_max_alibi_bias = 0.0e+00
llm_load_print_meta: f_logit_scale = 0.0e+00
llm_load_print_meta: n_ff = 1536
llm_load_print_meta: n_expert = 256
llm_load_print_meta: n_expert_used = 8
llm_load_print_meta: causal attn = 1
llm_load_print_meta: pooling type = 0
llm_load_print_meta: rope type = 2
llm_load_print_meta: rope scaling = linear
llm_load_print_meta: freq_base_train = 5000000.0
llm_load_print_meta: freq_scale_train = 1
llm_load_print_meta: n_ctx_orig_yarn = 196608
llm_load_print_meta: rope_finetuned = unknown
llm_load_print_meta: ssm_d_conv = 0
llm_load_print_meta: ssm_d_inner = 0
llm_load_print_meta: ssm_d_state = 0
llm_load_print_meta: ssm_dt_rank = 0
llm_load_print_meta: ssm_n_group = 0
llm_load_print_meta: model type = 230B.A10B
llm_load_print_meta: model ftype = BF16
llm_load_print_meta: model params = 228.690 B
llm_load_print_meta: model size = 426.060 GiB (16.003 BPW)
llm_load_print_meta: repeating layers = 423.771 GiB (16.003 BPW, 227.461 B parameters)
llm_load_print_meta: general.name = MiniMax M2.7
print_info: vocab type = BPE
print_info: n_vocab = 200064
print_info: n_merges = 199744
print_info: BOS token = 200034 ']~!b['
print_info: EOS token = 200020 '[e~['
print_info: UNK token = 200021 ']!d~['
print_info: LF token = 10 'Ċ'
print_info: FIM PRE token = 200001 '<fim_prefix>'
print_info: FIM SUF token = 200003 '<fim_suffix>'
print_info: FIM MID token = 200002 '<fim_middle>'
print_info: FIM PAD token = 200004 '<fim_pad>'
print_info: FIM REP token = 200005 '<reponame>'
print_info: EOG token = 200004 '<fim_pad>'
print_info: EOG token = 200005 '<reponame>'
print_info: EOG token = 200020 '[e~['
print_info: max token length = 256
======================================= HAVE_FANCY_SIMD is defined
Free memory 0 MiB on device 0 is less the 1024 MiB safety margin
------------------- Layer sizes:
Layer 0: 6999.05, 16.00, 7015.05 864.00 MiB
Layer 1: 6999.05, 16.00, 7015.05 864.00 MiB
Layer 2: 6999.05, 16.00, 7015.05 864.00 MiB
Layer 3: 6999.05, 16.00, 7015.05 864.00 MiB
Layer 4: 6999.05, 16.00, 7015.05 864.00 MiB
Layer 5: 6999.05, 16.00, 7015.05 864.00 MiB
Layer 6: 6999.05, 16.00, 7015.05 864.00 MiB
Layer 7: 6999.05, 16.00, 7015.05 864.00 MiB
Layer 8: 6999.05, 16.00, 7015.05 864.00 MiB
Layer 9: 6999.05, 16.00, 7015.05 864.00 MiB
Layer 10: 6999.05, 16.00, 7015.05 864.00 MiB
Layer 11: 6999.05, 16.00, 7015.05 864.00 MiB
Layer 12: 6999.05, 16.00, 7015.05 864.00 MiB
Layer 13: 6999.05, 16.00, 7015.05 864.00 MiB
Layer 14: 6999.05, 16.00, 7015.05 864.00 MiB
Layer 15: 6999.05, 16.00, 7015.05 864.00 MiB
Layer 16: 6999.05, 16.00, 7015.05 864.00 MiB
Layer 17: 6999.05, 16.00, 7015.05 864.00 MiB
Layer 18: 6999.05, 16.00, 7015.05 864.00 MiB
Layer 19: 6999.05, 16.00, 7015.05 864.00 MiB
Layer 20: 6999.05, 16.00, 7015.05 864.00 MiB
Layer 21: 6999.05, 16.00, 7015.05 864.00 MiB
Layer 22: 6999.05, 16.00, 7015.05 864.00 MiB
Layer 23: 6999.05, 16.00, 7015.05 864.00 MiB
Layer 24: 6999.05, 16.00, 7015.05 864.00 MiB
Layer 25: 6999.05, 16.00, 7015.05 864.00 MiB
Layer 26: 6999.05, 16.00, 7015.05 864.00 MiB
Layer 27: 6999.05, 16.00, 7015.05 864.00 MiB
Layer 28: 6999.05, 16.00, 7015.05 864.00 MiB
Layer 29: 6999.05, 16.00, 7015.05 864.00 MiB
Layer 30: 6999.05, 16.00, 7015.05 864.00 MiB
Layer 31: 6999.05, 16.00, 7015.05 864.00 MiB
Layer 32: 6999.05, 16.00, 7015.05 864.00 MiB
Layer 33: 6999.05, 16.00, 7015.05 864.00 MiB
Layer 34: 6999.05, 16.00, 7015.05 864.00 MiB
Layer 35: 6999.05, 16.00, 7015.05 864.00 MiB
Layer 36: 6999.05, 16.00, 7015.05 864.00 MiB
Layer 37: 6999.05, 16.00, 7015.05 864.00 MiB
Layer 38: 6999.05, 16.00, 7015.05 864.00 MiB
Layer 39: 6999.05, 16.00, 7015.05 864.00 MiB
Layer 40: 6999.05, 16.00, 7015.05 864.00 MiB
Layer 41: 6999.05, 16.00, 7015.05 864.00 MiB
Layer 42: 6999.05, 16.00, 7015.05 864.00 MiB
Layer 43: 6999.05, 16.00, 7015.05 864.00 MiB
Layer 44: 6999.05, 16.00, 7015.05 864.00 MiB
Layer 45: 6999.05, 16.00, 7015.05 864.00 MiB
Layer 46: 6999.05, 16.00, 7015.05 864.00 MiB
Layer 47: 6999.05, 16.00, 7015.05 864.00 MiB
Layer 48: 6999.05, 16.00, 7015.05 864.00 MiB
Layer 49: 6999.05, 16.00, 7015.05 864.00 MiB
Layer 50: 6999.05, 16.00, 7015.05 864.00 MiB
Layer 51: 6999.05, 16.00, 7015.05 864.00 MiB
Layer 52: 6999.05, 16.00, 7015.05 864.00 MiB
Layer 53: 6999.05, 16.00, 7015.05 864.00 MiB
Layer 54: 6999.05, 16.00, 7015.05 864.00 MiB
Layer 55: 6999.05, 16.00, 7015.05 864.00 MiB
Layer 56: 6999.05, 16.00, 7015.05 864.00 MiB
Layer 57: 6999.05, 16.00, 7015.05 864.00 MiB
Layer 58: 6999.05, 16.00, 7015.05 864.00 MiB
Layer 59: 6999.05, 16.00, 7015.05 864.00 MiB
Layer 60: 6999.05, 16.00, 7015.05 864.00 MiB
Layer 61: 6999.05, 16.00, 7015.05 864.00 MiB
Layer 62: 1172.25, 2262.00, 3434.25 MiB (output layer)
--------------------------------------------------------------------------
Total : 433941.21, 3254.00, 437195.21 MiB
Free memory 0 MiB on device 0 is less the required compute buffer size 864 MiB
Memory required for model tensors + cache: 438367 MiB
Memory available on all devices - compute: 0 MiB
llm_load_tensors: ggml ctx size = 0.35 MiB
llm_load_tensors: offloading 0 repeating layers to GPU
llm_load_tensors: offloaded 0/63 layers to GPU
llm_load_tensors: CPU buffer size = 436285.72 MiB
....................................................................................................
llama_init_from_model: n_ctx = 4096
llama_init_from_model: n_batch = 4096
llama_init_from_model: n_ubatch = 4096
llama_init_from_model: flash_attn = 1
llama_init_from_model: attn_max_b = 0
llama_init_from_model: fused_moe = 1
llama_init_from_model: grouped er = 0
llama_init_from_model: fused_up_gate = 1
llama_init_from_model: fused_mmad = 1
llama_init_from_model: rope_cache = 0
llama_init_from_model: graph_reuse = 1
llama_init_from_model: k_cache_hadam = 0
llama_init_from_model: v_cache_hadam = 0
llama_init_from_model: split_mode_graph_scheduling = 0
llama_init_from_model: reduce_type = f16
llama_init_from_model: sched_async = 0
llama_init_from_model: ser = -1, 0
llama_init_from_model: freq_base = 5000000.0
llama_init_from_model: freq_scale = 1
llama_kv_cache_init: CPU KV buffer size = 992.00 MiB
llama_init_from_model: KV self size = 992.00 MiB, K (f16): 496.00 MiB, V (f16): 496.00 MiB
llama_init_from_model: CPU output buffer size = 6.11 MiB
llama_init_from_model: CPU compute buffer size = 3222.00 MiB
llama_init_from_model: graph nodes = 2361
llama_init_from_model: graph splits = 1
llama_init_from_model: enabling only_active_experts scheduling
system_info: n_threads = 96 (n_threads_batch = 128) / 512 | AVX = 1 | AVX_VNNI = 1 | AVX2 = 1 | AVX512 = 1 | AVX512_VBMI = 1 | AVX512_VNNI = 1 | AVX512_BF16 = 1 | FMA = 1 | NEON = 0 | SVE = 0 | ARM_FMA = 0 | F16C = 1 | FP16_VA = 0 | WASM_SIMD = 0 | BLAS = 0 | SSE3 = 1 | SSSE3 = 1 | VSX = 0 | MATMUL_INT8 = 0 |
perplexity: tokenizing the input ..
perplexity: tokenization took 610.982 ms
perplexity: calculating perplexity over 552 chunks, n_ctx=512, batch_size=4096, n_seq=8
perplexity: 9.46 seconds per pass - ETA 10.87 minutes
[1]3.6656,[2]4.5631,[3]4.1041,[4]4.6958,[5]5.0340,[6]5.5438,[7]5.9163,[8]6.8530,[9]7.2490,[10]7.4509,[11]7.5664,[12]7.9755,[13]8.0297,[14]7.8854,[15]8.0616,[16]7.6778,[17]7.8172,[18]7.7946,[19]7.6677,[20]7.4355,[21]7.3612,[22]7.1414,[23]6.9067,[24]6.7766,[25]6.4477,[26]6.2765,[27]6.4009,[28]6.3920,[29]6.4380,[30]6.4461,[31]6.3949,[32]6.4372,[33]6.5454,[34]6.7099,[35]6.8250,[36]6.7806,[37]6.8260,[38]6.8941,[39]6.8900,[40]6.9985,[41]7.0597,[42]7.0173,[43]7.0260,[44]7.1724,[45]7.2612,[46]7.2440,[47]7.2149,[48]7.2205,[49]7.2506,[50]7.3211,[51]7.3520,[52]7.3695,[53]7.4219,[54]7.4346,[55]7.4775,[56]7.4507,[57]7.4740,[58]7.4490,[59]7.4850,[60]7.5498,[61]7.6384,[62]7.7100,[63]7.7606,[64]7.7537,[65]7.7692,[66]7.7704,[67]7.7934,[68]7.8412,[69]7.8594,[70]7.8703,[71]7.8129,[72]7.8159,[73]7.8585,[74]7.8558,[75]7.7418,[76]7.6841,[77]7.6934,[78]7.7092,[79]7.7171,[80]7.7191,[81]7.7634,[82]7.7546,[83]7.7448,[84]7.7559,[85]7.7534,[86]7.8339,[87]7.8359,[88]7.8626,[89]7.8747,[90]7.8740,[91]7.8710,[92]7.8374,[93]7.8478,[94]7.8432,[95]7.8956,[96]7.9086,[97]7.9279,[98]7.9300,[99]7.9195,[100]7.9128,[101]7.9824,[102]8.0289,[103]8.0885,[104]8.1176,[105]8.1968,[106]8.2238,[107]8.1898,[108]8.2690,[109]8.3158,[110]8.2866,[111]8.2418,[112]8.2384,[113]8.1935,[114]8.1922,[115]8.1455,[116]8.1232,[117]8.0821,[118]8.0651,[119]8.0140,[120]7.9778,[121]7.9449,[122]7.8790,[123]7.8340,[124]7.7965,[125]7.7510,[126]7.7388,[127]7.7407,[128]7.7545,[129]7.7457,[130]7.7381,[131]7.7453,[132]7.7583,[133]7.7659,[134]7.7828,[135]7.7795,[136]7.7707,[137]7.7718,[138]7.7248,[139]7.6961,[140]7.6593,[141]7.6323,[142]7.5865,[143]7.5469,[144]7.5187,[145]7.5080,[146]7.4787,[147]7.4531,[148]7.4052,[149]7.3720,[150]7.3499,[151]7.3296,[152]7.3032,[153]7.2954,[154]7.2732,[155]7.2714,[156]7.2560,[157]7.2571,[158]7.2651,[159]7.2700,[160]7.2895,[161]7.3086,[162]7.3532,[163]7.3901,[164]7.4233,[165]7.4771,[166]7.4971,[167]7.5315,[168]7.5600,[169]7.5738,[170]7.5733,[171]7.5736,[172]7.5996,[173]7.5717,[174]7.5806,[175]7.5855,[176]7.5932,[177]7.5970,[178]7.5951,[179]7.6304,[180]7.6630,[181]7.6861,[182]7.6910,[183]7.7167,[184]7.7574,[185]7.7880,[186]7.8066,[187]7.8162,[188]7.8144,[189]7.7919,[190]7.7909,[191]7.7782,[192]7.8051,[193]7.8274,[194]7.8509,[195]7.8479,[196]7.8603,[197]7.8372,[198]7.8671,[199]7.8418,[200]7.8287,[201]7.8141,[202]7.7981,[203]7.7848,[204]7.7832,[205]7.7930,[206]7.8023,[207]7.7837,[208]7.7542,[209]7.7405,[210]7.7391,[211]7.7234,[212]7.7204,[213]7.7098,[214]7.6778,[215]7.6529,[216]7.6413,[217]7.6188,[218]7.6045,[219]7.5990,[220]7.5952,[221]7.5911,[222]7.5647,[223]7.5557,[224]7.5500,[225]7.5435,[226]7.5416,[227]7.5478,[228]7.5551,[229]7.5528,[230]7.5685,[231]7.5729,[232]7.6018,[233]7.6216,[234]7.6339,[235]7.6444,[236]7.6613,[237]7.6791,[238]7.6814,[239]7.6993,[240]7.7307,[241]7.7472,[242]7.7498,[243]7.7599,[244]7.7536,[245]7.7211,[246]7.7042,[247]7.6853,[248]7.6771,[249]7.6767,[250]7.6847,[251]7.6839,[252]7.6740,[253]7.6611,[254]7.6636,[255]7.6494,[256]7.6359,[257]7.6252,[258]7.6126,[259]7.6127,[260]7.6112,[261]7.5924,[262]7.5901,[263]7.5764,[264]7.5703,[265]7.5635,[266]7.5413,[267]7.5431,[268]7.5119,[269]7.5022,[270]7.4985,[271]7.4915,[272]7.4801,[273]7.4802,[274]7.4959,[275]7.5045,[276]7.5137,[277]7.5219,[278]7.5279,[279]7.5386,[280]7.5489,[281]7.5641,[282]7.5562,[283]7.5491,[284]7.5521,[285]7.5470,[286]7.5421,[287]7.5376,[288]7.5509,[289]7.5615,[290]7.5601,[291]7.5600,[292]7.5600,[293]7.5600,[294]7.5642,[295]7.5703,[296]7.5716,[297]7.5745,[298]7.5763,[299]7.5789,[300]7.5901,[301]7.5973,[302]7.5894,[303]7.5843,[304]7.5729,[305]7.5810,[306]7.5904,[307]7.6015,[308]7.6221,[309]7.6225,[310]7.6362,[311]7.6292,[312]7.6322,[313]7.6230,[314]7.6165,[315]7.6209,[316]7.6119,[317]7.6110,[318]7.6191,[319]7.6134,[320]7.6291,[321]7.6255,[322]7.6268,[323]7.6223,[324]7.6176,[325]7.6149,[326]7.6256,[327]7.6305,[328]7.6273,[329]7.6250,[330]7.6151,[331]7.6053,[332]7.5948,[333]7.5961,[334]7.5910,[335]7.5808,[336]7.5913,[337]7.5987,[338]7.6104,[339]7.6035,[340]7.6011,[341]7.5970,[342]7.6041,[343]7.5974,[344]7.5911,[345]7.6001,[346]7.6168,[347]7.6384,[348]7.6624,[349]7.6719,[350]7.6895,[351]7.7089,[352]7.7214,[353]7.7347,[354]7.7402,[355]7.7521,[356]7.7630,[357]7.7596,[358]7.7733,[359]7.7882,[360]7.7959,[361]7.8080,[362]7.8177,[363]7.8311,[364]7.8410,[365]7.8624,[366]7.8731,[367]7.8729,[368]7.8774,[369]7.8842,[370]7.9083,[371]7.9246,[372]7.9287,[373]7.9194,[374]7.9171,[375]7.9213,[376]7.9309,[377]7.9345,[378]7.9434,[379]7.9504,[380]7.9578,[381]7.9718,[382]7.9681,[383]7.9411,[384]7.9353,[385]7.9267,[386]7.9317,[387]7.9356,[388]7.9338,[389]7.9387,[390]7.9460,[391]7.9365,[392]7.9248,[393]7.9228,[394]7.9101,[395]7.9025,[396]7.9018,[397]7.9011,[398]7.8880,[399]7.8775,[400]7.8688,[401]7.8612,[402]7.8511,[403]7.8405,[404]7.8306,[405]7.8311,[406]7.8418,[407]7.8508,[408]7.8413,[409]7.8336,[410]7.8379,[411]7.8256,[412]7.8267,[413]7.8268,[414]7.8239,[415]7.8266,[416]7.8194,[417]7.8139,[418]7.8066,[419]7.8058,[420]7.8012,[421]7.7983,[422]7.7938,[423]7.7919,[424]7.7852,[425]7.7769,[426]7.7630,[427]7.7596,[428]7.7505,[429]7.7392,[430]7.7274,[431]7.7171,[432]7.7225,[433]7.7362,[434]7.7447,[435]7.7575,[436]7.7544,[437]7.7531,[438]7.7527,[439]7.7596,[440]7.7589,[441]7.7615,[442]7.7646,[443]7.7763,[444]7.7848,[445]7.7860,[446]7.7909,[447]7.7838,[448]7.7855,[449]7.7782,[450]7.7856,[451]7.7936,[452]7.7931,[453]7.7908,[454]7.7827,[455]7.7849,[456]7.7958,[457]7.7969,[458]7.8021,[459]7.8115,[460]7.8158,[461]7.8152,[462]7.8202,[463]7.8206,[464]7.8237,[465]7.8228,[466]7.8170,[467]7.8193,[468]7.8168,[469]7.8129,[470]7.8143,[471]7.8194,[472]7.8303,[473]7.8229,[474]7.8264,[475]7.8234,[476]7.8283,[477]7.8393,[478]7.8438,[479]7.8503,[480]7.8590,[481]7.8611,[482]7.8603,[483]7.8674,[484]7.8730,[485]7.8667,[486]7.8620,[487]7.8584,[488]7.8555,[489]7.8530,[490]7.8443,[491]7.8481,[492]7.8480,[493]7.8589,[494]7.8463,[495]7.8454,[496]7.8446,[497]7.8486,[498]7.8556,[499]7.8593,[500]7.8531,[501]7.8458,[502]7.8390,[503]7.8475,[504]7.8478,[505]7.8499,[506]7.8550,[507]7.8512,[508]7.8536,[509]7.8647,[510]7.8598,[511]7.8729,[512]7.8756,[513]7.8680,[514]7.8727,[515]7.8777,[516]7.8826,[517]7.8779,[518]7.8622,[519]7.8625,[520]7.8586,[521]7.8514,[522]7.8456,[523]7.8211,[524]7.8176,[525]7.8179,[526]7.8212,[527]7.8283,[528]7.8285,[529]7.8382,[530]7.8471,[531]7.8564,[532]7.8649,[533]7.8717,[534]7.8862,[535]7.8828,[536]7.8837,[537]7.8719,[538]7.8671,[539]7.8622,[540]7.8587,[541]7.8622,[542]7.8632,[543]7.8620,[544]7.8573,[545]7.8589,[546]7.8547,[547]7.8530,[548]7.8598,[549]7.8650,[550]7.8771,[551]7.8755,[552]7.8743,
llama_print_timings: load time = 82834.14 ms
llama_print_timings: sample time = 0.00 ms / 1 runs ( 0.00 ms per token, inf tokens per second)
llama_print_timings: prompt eval time = 525259.74 ms / 282624 tokens ( 1.86 ms per token, 538.07 tokens per second)
llama_print_timings: eval time = 0.00 ms / 1 runs ( 0.00 ms per token, inf tokens per second)
llama_print_timings: total time = 536052.62 ms / 282625 tokens
Final estimate: PPL over 552 chunks for n_ctx=512 = 7.8743 +/- 0.05993