| #!/usr/bin/env bash |
|
|
| # echo 0 | sudo tee /proc/sys/kernel/numa_balancing |
| # sudo sync; echo 3 | sudo tee /proc/sys/vm/drop_caches |
|
|
| model=/mnt/data/models/ubergarm/Step-3.5-Flash-GGUF/Step-3.5-Flash-288x7.4B-BF16-00001-of-00009.gguf |
| #model=/mnt/data/models/ubergarm/Step-3.5-Flash-GGUF/Step-3.5-Flash-Q8_0.gguf |
| #model=/mnt/data/models/stepfun-ai/Step-3.5-Flash-Int4/step3p5_flash_Q4_K_S-00001-of-00012.gguf |
| #model=/mnt/raid/hf/Step-3.5-Flash-GGUF/IQ4_XS/Step-3.5-Flash-IQ4_XS-00001-of-00004.gguf |
| #model=/mnt/raid/hf/Step-3.5-Flash-GGUF/IQ5_K/Step-3.5-Flash-IQ5_K-00001-of-00004.gguf |
| #model=/mnt/data/models/ubergarm/Step-3.5-Flash-GGUF/Step-3.5-Flash-IQ3_KS.gguf |
| #model=/mnt/data/models/ubergarm/Step-3.5-Flash-GGUF/Step-3.5-Flash-smol-IQ3_KS.gguf |
| #model=/mnt/data/models/ubergarm/Step-3.5-Flash-GGUF/Step-3.5-Flash-IQ2_KL.gguf |
|
|
| # Check if the SOCKET variable is unset or empty. |
| if [[ -z "${SOCKET}" ]]; then |
| # If it is, print an error to standard error and exit with a non-zero status. |
| echo "Error: The SOCKET environment variable is not set." >&2 |
| exit 1 |
| else |
| # If it is set, print its value and exit successfully. |
| echo "SOCKET is set to: ${SOCKET}" |
| fi |
| SOCKET="${SOCKET}" |
|
|
| numactl -N "$SOCKET" -m "$SOCKET" \ |
| ./build/bin/llama-perplexity \ |
| -m "$model" \ |
| -f wiki.test.raw \ |
| --seed 1337 \ |
| --ctx-size 512 \ |
| -ub 4096 -b 4096 \ |
| --numa numactl \ |
| --threads 96 \ |
| --threads-batch 128 \ |
| --validate-quants \ |
| --no-mmap |
|
|
| SOCKET is set to: 1 |
| main: build = 4186 (82c4f273) |
| main: built with cc (Ubuntu 13.3.0-6ubuntu2~24.04) 13.3.0 for x86_64-linux-gnu |
| main: seed = 1337 |
| CPU: using device CPU - 0 MiB free |
| llama_model_loader: additional 8 GGUFs metadata loaded. |
| llama_model_loader: loaded meta data with 50 key-value pairs and 754 tensors from /mnt/data/models/ubergarm/Step-3.5-Flash-GGUF/Step-3.5-Flash-288x7.4B-BF16-00001-of-00009.gguf (version GGUF V3 (latest)) |
| llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output. |
| llama_model_loader: - kv 0: general.architecture str = step35 |
| llama_model_loader: - kv 1: general.type str = model |
| llama_model_loader: - kv 2: general.name str = Step 3.5 Flash |
| llama_model_loader: - kv 3: general.size_label str = 288x7.4B |
| llama_model_loader: - kv 4: general.license str = apache-2.0 |
| llama_model_loader: - kv 5: general.base_model.count u32 = 1 |
| llama_model_loader: - kv 6: general.base_model.0.name str = Step 3.5 Flash |
| llama_model_loader: - kv 7: general.base_model.0.organization str = Stepfun Ai |
| llama_model_loader: - kv 8: general.base_model.0.repo_url str = https: |
| llama_model_loader: - kv 9: step35.block_count u32 = 45 |
| llama_model_loader: - kv 10: step35.context_length u32 = 262144 |
| llama_model_loader: - kv 11: step35.embedding_length u32 = 4096 |
| llama_model_loader: - kv 12: step35.feed_forward_length u32 = 11264 |
| llama_model_loader: - kv 13: step35.attention.head_count arr[i32,45] = [64, 96, 96, 96, 64, 96, 96, 96, 64, ... |
| llama_model_loader: - kv 14: step35.rope.freq_base f32 = 5000000.000000 |
| llama_model_loader: - kv 15: step35.rope.freq_base_swa f32 = 10000.000000 |
| llama_model_loader: - kv 16: step35.expert_gating_func u32 = 2 |
| llama_model_loader: - kv 17: step35.attention.key_length u32 = 128 |
| llama_model_loader: - kv 18: step35.attention.value_length u32 = 128 |
| llama_model_loader: - kv 19: general.file_type u32 = 32 |
| llama_model_loader: - kv 20: step35.attention.head_count_kv arr[i32,45] = [8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, ... |
| llama_model_loader: - kv 21: step35.attention.sliding_window u32 = 512 |
| llama_model_loader: - kv 22: step35.attention.sliding_window_pattern arr[i32,45] = [0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, ... |
| llama_model_loader: - kv 23: step35.expert_count u32 = 288 |
| llama_model_loader: - kv 24: step35.expert_used_count u32 = 8 |
| llama_model_loader: - kv 25: step35.expert_feed_forward_length u32 = 1280 |
| llama_model_loader: - kv 26: step35.expert_shared_feed_forward_length u32 = 1280 |
| llama_model_loader: - kv 27: step35.expert_weights_scale f32 = 3.000000 |
| llama_model_loader: - kv 28: step35.expert_weights_norm bool = true |
| llama_model_loader: - kv 29: step35.leading_dense_block_count u32 = 3 |
| llama_model_loader: - kv 30: step35.moe_every_n_layers u32 = 1 |
| llama_model_loader: - kv 31: step35.attention.layer_norm_rms_epsilon f32 = 0.000010 |
| llama_model_loader: - kv 32: step35.swiglu_clamp_exp arr[f32,45] = [0.000000, 0.000000, 0.000000, 0.0000... |
| llama_model_loader: - kv 33: step35.swiglu_clamp_shexp arr[f32,45] = [0.000000, 0.000000, 0.000000, 0.0000... |
| llama_model_loader: - kv 34: general.quantization_version u32 = 2 |
| llama_model_loader: - kv 35: tokenizer.ggml.model str = gpt2 |
| llama_model_loader: - kv 36: tokenizer.ggml.pre str = deepseek-v3 |
| llama_model_loader: - kv 37: tokenizer.ggml.tokens arr[str,128896] = ["<|begin▁of▁sentence|>", "<�... |
| llama_model_loader: - kv 38: tokenizer.ggml.token_type arr[i32,128896] = [3, 3, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, ... |
| llama_model_loader: - kv 39: tokenizer.ggml.merges arr[str,127741] = ["Ġ t", "Ġ a", "i n", "Ġ Ġ", "h e... |
| llama_model_loader: - kv 40: tokenizer.ggml.bos_token_id u32 = 0 |
| llama_model_loader: - kv 41: tokenizer.ggml.eos_token_id u32 = 128007 |
| llama_model_loader: - kv 42: tokenizer.ggml.padding_token_id u32 = 1 |
| llama_model_loader: - kv 43: tokenizer.ggml.add_bos_token bool = true |
| llama_model_loader: - kv 44: tokenizer.ggml.add_sep_token bool = false |
| llama_model_loader: - kv 45: tokenizer.ggml.add_eos_token bool = false |
| llama_model_loader: - kv 46: tokenizer.chat_template str = {% macro render_content(content) %}{%... |
| llama_model_loader: - kv 47: split.no u16 = 0 |
| llama_model_loader: - kv 48: split.count u16 = 9 |
| llama_model_loader: - kv 49: split.tensors.count i32 = 754 |
| llama_model_loader: - type f32: 266 tensors |
| llama_model_loader: - type bf16: 488 tensors |
| load: printing all EOG tokens: |
| load: - 128007 ('<|im_end|>') |
| load: special tokens cache size = 818 |
| load: token to piece cache size = 0.8220 MB |
| llm_load_print_meta: format = GGUF V3 (latest) |
| llm_load_print_meta: arch = step35 |
| llm_load_print_meta: n_ctx_train = 262144 |
| llm_load_print_meta: n_embd = 4096 |
| llm_load_print_meta: n_layer = 45 |
| llm_load_print_meta: n_head = [64, 96, 96, 96, 64, 96, 96, 96, 64, 96, 96, 96, 64, 96, 96, 96, 64, 96, 96, 96, 64, 96, 96, 96, 64, 96, 96, 96, 64, 96, 96, 96, 64, 96, 96, 96, 64, 96, 96, 96, 64, 96, 96, 96, 64] |
| llm_load_print_meta: n_head_kv = 8 |
| llm_load_print_meta: n_rot = 128 |
| llm_load_print_meta: n_swa = 512 |
| llm_load_print_meta: n_swa_pattern = 1 |
| llm_load_print_meta: n_embd_head_k = 128 |
| llm_load_print_meta: n_embd_head_v = 128 |
| llm_load_print_meta: n_gqa = [8, 12, 12, 12, 8, 12, 12, 12, 8, 12, 12, 12, 8, 12, 12, 12, 8, 12, 12, 12, 8, 12, 12, 12, 8, 12, 12, 12, 8, 12, 12, 12, 8, 12, 12, 12, 8, 12, 12, 12, 8, 12, 12, 12, 8] |
| llm_load_print_meta: n_embd_k_gqa = 1024 |
| llm_load_print_meta: n_embd_v_gqa = 1024 |
| llm_load_print_meta: f_norm_eps = 0.0e+00 |
| llm_load_print_meta: f_norm_rms_eps = 1.0e-05 |
| llm_load_print_meta: f_clamp_kqv = 0.0e+00 |
| llm_load_print_meta: f_max_alibi_bias = 0.0e+00 |
| llm_load_print_meta: f_logit_scale = 0.0e+00 |
| llm_load_print_meta: n_ff = 11264 |
| llm_load_print_meta: n_expert = 288 |
| llm_load_print_meta: n_expert_used = 8 |
| llm_load_print_meta: causal attn = 1 |
| llm_load_print_meta: pooling type = 0 |
| llm_load_print_meta: rope type = 2 |
| llm_load_print_meta: rope scaling = linear |
| llm_load_print_meta: freq_base_train = 5000000.0 |
| llm_load_print_meta: freq_scale_train = 1 |
| llm_load_print_meta: n_ctx_orig_yarn = 262144 |
| llm_load_print_meta: rope_finetuned = unknown |
| llm_load_print_meta: ssm_d_conv = 0 |
| llm_load_print_meta: ssm_d_inner = 0 |
| llm_load_print_meta: ssm_d_state = 0 |
| llm_load_print_meta: ssm_dt_rank = 0 |
| llm_load_print_meta: model type = ?B |
| llm_load_print_meta: model ftype = BF16 |
| llm_load_print_meta: model params = 196.956 B |
| llm_load_print_meta: model size = 366.952 GiB (16.004 BPW) |
| llm_load_print_meta: repeating layers = 364.986 GiB (16.004 BPW, 195.900 B parameters) |
| llm_load_print_meta: general.name = Step 3.5 Flash |
| print_info: vocab type = BPE |
| print_info: n_vocab = 128896 |
| print_info: n_merges = 127741 |
| print_info: BOS token = 0 '<|begin▁of▁sentence|>' |
| print_info: EOS token = 128007 '<|im_end|>' |
| print_info: EOT token = 128007 '<|im_end|>' |
| print_info: PAD token = 1 '<|end▁of▁sentence|>' |
| print_info: LF token = 201 'Ċ' |
| print_info: FIM PRE token = 128801 '<|fim▁begin|>' |
| print_info: FIM SUF token = 128800 '<|fim▁hole|>' |
| print_info: FIM MID token = 128802 '<|fim▁end|>' |
| print_info: EOG token = 128007 '<|im_end|>' |
| print_info: max token length = 256 |
| llm_load_tensors: ggml ctx size = 0.31 MiB |
| llm_load_tensors: offloading 0 repeating layers to GPU |
| llm_load_tensors: offloaded 0/46 layers to GPU |
| llm_load_tensors: CPU buffer size = 375759.27 MiB |
| .................................................................................................... |
| llama_new_context_with_model: n_ctx = 4096 |
| llama_new_context_with_model: n_batch = 4096 |
| llama_new_context_with_model: n_ubatch = 4096 |
| llama_new_context_with_model: flash_attn = 1 |
| llama_new_context_with_model: attn_max_b = 0 |
| llama_new_context_with_model: fused_moe = 1 |
| llama_new_context_with_model: grouped er = 0 |
| llama_new_context_with_model: fused_up_gate = 1 |
| llama_new_context_with_model: fused_mmad = 1 |
| llama_new_context_with_model: rope_cache = 0 |
| llama_new_context_with_model: graph_reuse = 1 |
| llama_new_context_with_model: k_cache_hadam = 0 |
| llama_new_context_with_model: split_mode_graph_scheduling = 0 |
| llama_new_context_with_model: reduce_type = f16 |
| llama_new_context_with_model: sched_async = 0 |
| llama_new_context_with_model: ser = -1, 0 |
| llama_new_context_with_model: freq_base = 5000000.0 |
| llama_new_context_with_model: freq_scale = 1 |
| llama_kv_cache_init: CPU KV buffer size = 720.00 MiB |
| llama_new_context_with_model: KV self size = 720.00 MiB, K (f16): 360.00 MiB, V (f16): 360.00 MiB |
| llama_new_context_with_model: CPU output buffer size = 3.93 MiB |
| llama_new_context_with_model: CPU compute buffer size = 2078.00 MiB |
| llama_new_context_with_model: graph nodes = 2201 |
| llama_new_context_with_model: graph splits = 1 |
| XXXXXXXXXXXXXXXXXXXXX Setting only active experts offload |
|
|
| system_info: n_threads = 96 (n_threads_batch = 128) / 512 | AVX = 1 | AVX_VNNI = 1 | AVX2 = 1 | AVX512 = 1 | AVX512_VBMI = 1 | AVX512_VNNI = 1 | AVX512_BF16 = 1 | FMA = 1 | NEON = 0 | SVE = 0 | ARM_FMA = 0 | F16C = 1 | FP16_VA = 0 | WASM_SIMD = 0 | BLAS = 0 | SSE3 = 1 | SSSE3 = 1 | VSX = 0 | MATMUL_INT8 = 0 | |
| perplexity: tokenizing the input .. |
| perplexity: tokenization took 723.567 ms |
| perplexity: calculating perplexity over 561 chunks, n_ctx=512, batch_size=4096, n_seq=8 |
| perplexity: 15.47 seconds per pass - ETA 18.07 minutes |
| ===================================== llama_new_context_with_model: f16 |
| ======================================= HAVE_FANCY_SIMD is defined |
| [1]1.5125,[2]1.9280,[3]1.6178,[4]1.4760,[5]1.4000,[6]1.3378,[7]1.3006,[8]1.2759,[9]1.2557,[10]1.2356,[11]1.2434,[12]1.2544,[13]1.2647,[14]1.3110,[15]1.3541,[16]1.3996,[17]1.5016,[18]1.5846,[19]1.5731,[20]1.5561,[21]1.5612,[22]1.5507,[23]1.5331,[24]1.5278,[25]1.5172,[26]1.5098,[27]1.5009,[28]1.4958,[29]1.4917,[30]1.4977,[31]1.4967,[32]1.4862,[33]1.4809,[34]1.4913,[35]1.4946,[36]1.5053,[37]1.5355,[38]1.5694,[39]1.6011,[40]1.6472,[41]1.6760,[42]1.6825,[43]1.7172,[44]1.7369,[45]1.7776,[46]1.8155,[47]1.8165,[48]1.8110,[49]1.8061,[50]1.7953,[51]1.8169,[52]1.8152,[53]1.8328,[54]1.8418,[55]1.8548,[56]1.8631,[57]1.8643,[58]1.8699,[59]1.8768,[60]1.8919,[61]1.8872,[62]1.9143,[63]1.9293,[64]1.9433,[65]1.9454,[66]1.9417,[67]1.9391,[68]1.9453,[69]1.9449,[70]1.9460,[71]1.9421,[72]1.9417,[73]1.9501,[74]1.9627,[75]1.9628,[76]1.9498,[77]1.9417,[78]1.9370,[79]1.9331,[80]1.9279,[81]1.9234,[82]1.9262,[83]1.9219,[84]1.9180,[85]1.9127,[86]1.9152,[87]1.9229,[88]1.9163,[89]1.9171,[90]1.9167,[91]1.9127,[92]1.9089,[93]1.9056,[94]1.9002,[95]1.9012,[96]1.9053,[97]1.9162,[98]1.9152,[99]1.9091,[100]1.9069,[101]1.9066,[102]1.9153,[103]1.9198,[104]1.9363,[105]1.9437,[106]1.9683,[107]1.9908,[108]2.0093,[109]2.0375,[110]2.0637,[111]2.0878,[112]2.0815,[113]2.0835,[114]2.0887,[115]2.0900,[116]2.0982,[117]2.0991,[118]2.1001,[119]2.0971,[120]2.0958,[121]2.0988,[122]2.0957,[123]2.0949,[124]2.0910,[125]2.0874,[126]2.0863,[127]2.0868,[128]2.0853,[129]2.0883,[130]2.0891,[131]2.0895,[132]2.0910,[133]2.1011,[134]2.1063,[135]2.1041,[136]2.1007,[137]2.0981,[138]2.0948,[139]2.0931,[140]2.0920,[141]2.0920,[142]2.0917,[143]2.0939,[144]2.0946,[145]2.0887,[146]2.0841,[147]2.0816,[148]2.0776,[149]2.0759,[150]2.0711,[151]2.0657,[152]2.0632,[153]2.0603,[154]2.0590,[155]2.0579,[156]2.0557,[157]2.0555,[158]2.0547,[159]2.0544,[160]2.0526,[161]2.0621,[162]2.0724,[163]2.0757,[164]2.0811,[165]2.0870,[166]2.0970,[167]2.0994,[168]2.1125,[169]2.1201,[170]2.1320,[171]2.1389,[172]2.1361,[173]2.1299,[174]2.1337,[175]2.1363,[176]2.1380,[177]2.1385,[178]2.1385,[179]2.1403,[180]2.1426,[181]2.1545,[182]2.1659,[183]2.1785,[184]2.1920,[185]2.2015,[186]2.2151,[187]2.2300,[188]2.2433,[189]2.2494,[190]2.2499,[191]2.2526,[192]2.2557,[193]2.2550,[194]2.2580,[195]2.2577,[196]2.2627,[197]2.2682,[198]2.2709,[199]2.2707,[200]2.2704,[201]2.2808,[202]2.2752,[203]2.2756,[204]2.2758,[205]2.2770,[206]2.2777,[207]2.2782,[208]2.2809,[209]2.2834,[210]2.2825,[211]2.2798,[212]2.2796,[213]2.2796,[214]2.2784,[215]2.2749,[216]2.2745,[217]2.2700,[218]2.2682,[219]2.2687,[220]2.2680,[221]2.2684,[222]2.2646,[223]2.2630,[224]2.2663,[225]2.2666,[226]2.2632,[227]2.2648,[228]2.2669,[229]2.2686,[230]2.2755,[231]2.2821,[232]2.2807,[233]2.2788,[234]2.2786,[235]2.2789,[236]2.2814,[237]2.2857,[238]2.2896,[239]2.2969,[240]2.3025,[241]2.3097,[242]2.3165,[243]2.3226,[244]2.3274,[245]2.3365,[246]2.3413,[247]2.3413,[248]2.3395,[249]2.3395,[250]2.3363,[251]2.3351,[252]2.3388,[253]2.3440,[254]2.3505,[255]2.3528,[256]2.3542,[257]2.3561,[258]2.3563,[259]2.3555,[260]2.3564,[261]2.3564,[262]2.3564,[263]2.3570,[264]2.3558,[265]2.3556,[266]2.3568,[267]2.3584,[268]2.3604,[269]2.3629,[270]2.3620,[271]2.3645,[272]2.3624,[273]2.3610,[274]2.3581,[275]2.3584,[276]2.3541,[277]2.3568,[278]2.3644,[279]2.3720,[280]2.3785,[281]2.3816,[282]2.3827,[283]2.3869,[284]2.3908,[285]2.3995,[286]2.3997,[287]2.4026,[288]2.4078,[289]2.4094,[290]2.4075,[291]2.4083,[292]2.4165,[293]2.4195,[294]2.4216,[295]2.4238,[296]2.4268,[297]2.4273,[298]2.4297,[299]2.4306,[300]2.4315,[301]2.4335,[302]2.4351,[303]2.4356,[304]2.4357,[305]2.4437,[306]2.4474,[307]2.4559,[308]2.4506,[309]2.4480,[310]2.4432,[311]2.4426,[312]2.4399,[313]2.4376,[314]2.4357,[315]2.4354,[316]2.4353,[317]2.4330,[318]2.4308,[319]2.4298,[320]2.4300,[321]2.4270,[322]2.4274,[323]2.4282,[324]2.4255,[325]2.4236,[326]2.4203,[327]2.4176,[328]2.4185,[329]2.4184,[330]2.4218,[331]2.4228,[332]2.4261,[333]2.4255,[334]2.4253,[335]2.4257,[336]2.4261,[337]2.4274,[338]2.4280,[339]2.4294,[340]2.4319,[341]2.4356,[342]2.4404,[343]2.4458,[344]2.4486,[345]2.4474,[346]2.4446,[347]2.4455,[348]2.4445,[349]2.4417,[350]2.4408,[351]2.4423,[352]2.4415,[353]2.4421,[354]2.4420,[355]2.4420,[356]2.4403,[357]2.4410,[358]2.4415,[359]2.4387,[360]2.4372,[361]2.4374,[362]2.4370,[363]2.4360,[364]2.4361,[365]2.4331,[366]2.4331,[367]2.4333,[368]2.4315,[369]2.4314,[370]2.4304,[371]2.4320,[372]2.4343,[373]2.4323,[374]2.4299,[375]2.4292,[376]2.4321,[377]2.4358,[378]2.4335,[379]2.4320,[380]2.4310,[381]2.4324,[382]2.4333,[383]2.4354,[384]2.4386,[385]2.4416,[386]2.4447,[387]2.4495,[388]2.4516,[389]2.4481,[390]2.4448,[391]2.4411,[392]2.4397,[393]2.4389,[394]2.4375,[395]2.4345,[396]2.4323,[397]2.4286,[398]2.4258,[399]2.4222,[400]2.4188,[401]2.4144,[402]2.4113,[403]2.4074,[404]2.4042,[405]2.4002,[406]2.3964,[407]2.3934,[408]2.3907,[409]2.3869,[410]2.3861,[411]2.3874,[412]2.3864,[413]2.3888,[414]2.3894,[415]2.3861,[416]2.3825,[417]2.3850,[418]2.3814,[419]2.3801,[420]2.3776,[421]2.3747,[422]2.3706,[423]2.3670,[424]2.3663,[425]2.3636,[426]2.3602,[427]2.3576,[428]2.3562,[429]2.3537,[430]2.3505,[431]2.3470,[432]2.3453,[433]2.3430,[434]2.3409,[435]2.3391,[436]2.3380,[437]2.3377,[438]2.3381,[439]2.3395,[440]2.3425,[441]2.3478,[442]2.3534,[443]2.3516,[444]2.3511,[445]2.3515,[446]2.3537,[447]2.3564,[448]2.3580,[449]2.3595,[450]2.3612,[451]2.3634,[452]2.3642,[453]2.3656,[454]2.3641,[455]2.3664,[456]2.3674,[457]2.3700,[458]2.3738,[459]2.3739,[460]2.3745,[461]2.3727,[462]2.3734,[463]2.3768,[464]2.3811,[465]2.3792,[466]2.3804,[467]2.3820,[468]2.3835,[469]2.3839,[470]2.3849,[471]2.3872,[472]2.3892,[473]2.3895,[474]2.3912,[475]2.3928,[476]2.3930,[477]2.3936,[478]2.3945,[479]2.3961,[480]2.3975,[481]2.3948,[482]2.3958,[483]2.3948,[484]2.3977,[485]2.4024,[486]2.4038,[487]2.4061,[488]2.4079,[489]2.4099,[490]2.4128,[491]2.4155,[492]2.4188,[493]2.4186,[494]2.4172,[495]2.4168,[496]2.4166,[497]2.4169,[498]2.4168,[499]2.4157,[500]2.4170,[501]2.4207,[502]2.4199,[503]2.4202,[504]2.4209,[505]2.4228,[506]2.4245,[507]2.4259,[508]2.4280,[509]2.4251,[510]2.4246,[511]2.4238,[512]2.4222,[513]2.4199,[514]2.4195,[515]2.4192,[516]2.4170,[517]2.4164,[518]2.4161,[519]2.4153,[520]2.4149,[521]2.4149,[522]2.4137,[523]2.4146,[524]2.4141,[525]2.4148,[526]2.4135,[527]2.4115,[528]2.4113,[529]2.4105,[530]2.4100,[531]2.4090,[532]2.4065,[533]2.4042,[534]2.4025,[535]2.4023,[536]2.4037,[537]2.4056,[538]2.4072,[539]2.4089,[540]2.4121,[541]2.4149,[542]2.4175,[543]2.4190,[544]2.4184,[545]2.4186,[546]2.4160,[547]2.4136,[548]2.4108,[549]2.4085,[550]2.4071,[551]2.4058,[552]2.4042,[553]2.4031,[554]2.4033,[555]2.4028,[556]2.4056,[557]2.4078,[558]2.4111,[559]2.4132,[560]2.4173,[561]2.4169, |
| llama_print_timings: load time = 168747.99 ms |
| llama_print_timings: sample time = 0.00 ms / 1 runs ( 0.00 ms per token, inf tokens per second) |
| llama_print_timings: prompt eval time = 879771.94 ms / 287232 tokens ( 3.06 ms per token, 326.48 tokens per second) |
| llama_print_timings: eval time = 0.00 ms / 1 runs ( 0.00 ms per token, inf tokens per second) |
| llama_print_timings: total time = 890647.44 ms / 287233 tokens |
|
|
| Final estimate: PPL over 561 chunks for n_ctx=512 = 2.4169 +/- 0.01107 |
|
|